diff options
Diffstat (limited to 'kernel/futex.c')
| -rw-r--r-- | kernel/futex.c | 137 |
1 files changed, 76 insertions, 61 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 248dd119a86e..b911adceb2c4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -89,36 +89,36 @@ struct futex_pi_state { | |||
| 89 | union futex_key key; | 89 | union futex_key key; |
| 90 | }; | 90 | }; |
| 91 | 91 | ||
| 92 | /* | 92 | /** |
| 93 | * We use this hashed waitqueue instead of a normal wait_queue_t, so | 93 | * struct futex_q - The hashed futex queue entry, one per waiting task |
| 94 | * @task: the task waiting on the futex | ||
| 95 | * @lock_ptr: the hash bucket lock | ||
| 96 | * @key: the key the futex is hashed on | ||
| 97 | * @pi_state: optional priority inheritance state | ||
| 98 | * @rt_waiter: rt_waiter storage for use with requeue_pi | ||
| 99 | * @requeue_pi_key: the requeue_pi target futex key | ||
| 100 | * @bitset: bitset for the optional bitmasked wakeup | ||
| 101 | * | ||
| 102 | * We use this hashed waitqueue, instead of a normal wait_queue_t, so | ||
| 94 | * we can wake only the relevant ones (hashed queues may be shared). | 103 | * we can wake only the relevant ones (hashed queues may be shared). |
| 95 | * | 104 | * |
| 96 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. | 105 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. |
| 97 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. | 106 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. |
| 98 | * The order of wakup is always to make the first condition true, then | 107 | * The order of wakup is always to make the first condition true, then |
| 99 | * wake up q->waiter, then make the second condition true. | 108 | * the second. |
| 109 | * | ||
| 110 | * PI futexes are typically woken before they are removed from the hash list via | ||
| 111 | * the rt_mutex code. See unqueue_me_pi(). | ||
| 100 | */ | 112 | */ |
| 101 | struct futex_q { | 113 | struct futex_q { |
| 102 | struct plist_node list; | 114 | struct plist_node list; |
| 103 | /* Waiter reference */ | ||
| 104 | struct task_struct *task; | ||
| 105 | 115 | ||
| 106 | /* Which hash list lock to use: */ | 116 | struct task_struct *task; |
| 107 | spinlock_t *lock_ptr; | 117 | spinlock_t *lock_ptr; |
| 108 | |||
| 109 | /* Key which the futex is hashed on: */ | ||
| 110 | union futex_key key; | 118 | union futex_key key; |
| 111 | |||
| 112 | /* Optional priority inheritance state: */ | ||
| 113 | struct futex_pi_state *pi_state; | 119 | struct futex_pi_state *pi_state; |
| 114 | |||
| 115 | /* rt_waiter storage for requeue_pi: */ | ||
| 116 | struct rt_mutex_waiter *rt_waiter; | 120 | struct rt_mutex_waiter *rt_waiter; |
| 117 | |||
| 118 | /* The expected requeue pi target futex key: */ | ||
| 119 | union futex_key *requeue_pi_key; | 121 | union futex_key *requeue_pi_key; |
| 120 | |||
| 121 | /* Bitset for the optional bitmasked wakeup */ | ||
| 122 | u32 bitset; | 122 | u32 bitset; |
| 123 | }; | 123 | }; |
| 124 | 124 | ||
| @@ -198,11 +198,12 @@ static void drop_futex_key_refs(union futex_key *key) | |||
| 198 | } | 198 | } |
| 199 | 199 | ||
| 200 | /** | 200 | /** |
| 201 | * get_futex_key - Get parameters which are the keys for a futex. | 201 | * get_futex_key() - Get parameters which are the keys for a futex |
| 202 | * @uaddr: virtual address of the futex | 202 | * @uaddr: virtual address of the futex |
| 203 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED | 203 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED |
| 204 | * @key: address where result is stored. | 204 | * @key: address where result is stored. |
| 205 | * @rw: mapping needs to be read/write (values: VERIFY_READ, VERIFY_WRITE) | 205 | * @rw: mapping needs to be read/write (values: VERIFY_READ, |
| 206 | * VERIFY_WRITE) | ||
| 206 | * | 207 | * |
| 207 | * Returns a negative error code or 0 | 208 | * Returns a negative error code or 0 |
| 208 | * The key words are stored in *key on success. | 209 | * The key words are stored in *key on success. |
| @@ -288,8 +289,8 @@ void put_futex_key(int fshared, union futex_key *key) | |||
| 288 | drop_futex_key_refs(key); | 289 | drop_futex_key_refs(key); |
| 289 | } | 290 | } |
| 290 | 291 | ||
| 291 | /* | 292 | /** |
| 292 | * fault_in_user_writeable - fault in user address and verify RW access | 293 | * fault_in_user_writeable() - Fault in user address and verify RW access |
| 293 | * @uaddr: pointer to faulting user space address | 294 | * @uaddr: pointer to faulting user space address |
| 294 | * | 295 | * |
| 295 | * Slow path to fixup the fault we just took in the atomic write | 296 | * Slow path to fixup the fault we just took in the atomic write |
| @@ -309,8 +310,8 @@ static int fault_in_user_writeable(u32 __user *uaddr) | |||
| 309 | 310 | ||
| 310 | /** | 311 | /** |
| 311 | * futex_top_waiter() - Return the highest priority waiter on a futex | 312 | * futex_top_waiter() - Return the highest priority waiter on a futex |
| 312 | * @hb: the hash bucket the futex_q's reside in | 313 | * @hb: the hash bucket the futex_q's reside in |
| 313 | * @key: the futex key (to distinguish it from other futex futex_q's) | 314 | * @key: the futex key (to distinguish it from other futex futex_q's) |
| 314 | * | 315 | * |
| 315 | * Must be called with the hb lock held. | 316 | * Must be called with the hb lock held. |
| 316 | */ | 317 | */ |
| @@ -588,7 +589,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
| 588 | } | 589 | } |
| 589 | 590 | ||
| 590 | /** | 591 | /** |
| 591 | * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex | 592 | * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex |
| 592 | * @uaddr: the pi futex user address | 593 | * @uaddr: the pi futex user address |
| 593 | * @hb: the pi futex hash bucket | 594 | * @hb: the pi futex hash bucket |
| 594 | * @key: the futex key associated with uaddr and hb | 595 | * @key: the futex key associated with uaddr and hb |
| @@ -1011,9 +1012,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, | |||
| 1011 | 1012 | ||
| 1012 | /** | 1013 | /** |
| 1013 | * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue | 1014 | * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue |
| 1014 | * q: the futex_q | 1015 | * @q: the futex_q |
| 1015 | * key: the key of the requeue target futex | 1016 | * @key: the key of the requeue target futex |
| 1016 | * hb: the hash_bucket of the requeue target futex | 1017 | * @hb: the hash_bucket of the requeue target futex |
| 1017 | * | 1018 | * |
| 1018 | * During futex_requeue, with requeue_pi=1, it is possible to acquire the | 1019 | * During futex_requeue, with requeue_pi=1, it is possible to acquire the |
| 1019 | * target futex if it is uncontended or via a lock steal. Set the futex_q key | 1020 | * target futex if it is uncontended or via a lock steal. Set the futex_q key |
| @@ -1350,6 +1351,25 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | |||
| 1350 | return hb; | 1351 | return hb; |
| 1351 | } | 1352 | } |
| 1352 | 1353 | ||
| 1354 | static inline void | ||
| 1355 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | ||
| 1356 | { | ||
| 1357 | spin_unlock(&hb->lock); | ||
| 1358 | drop_futex_key_refs(&q->key); | ||
| 1359 | } | ||
| 1360 | |||
| 1361 | /** | ||
| 1362 | * queue_me() - Enqueue the futex_q on the futex_hash_bucket | ||
| 1363 | * @q: The futex_q to enqueue | ||
| 1364 | * @hb: The destination hash bucket | ||
| 1365 | * | ||
| 1366 | * The hb->lock must be held by the caller, and is released here. A call to | ||
| 1367 | * queue_me() is typically paired with exactly one call to unqueue_me(). The | ||
| 1368 | * exceptions involve the PI related operations, which may use unqueue_me_pi() | ||
| 1369 | * or nothing if the unqueue is done as part of the wake process and the unqueue | ||
| 1370 | * state is implicit in the state of woken task (see futex_wait_requeue_pi() for | ||
| 1371 | * an example). | ||
| 1372 | */ | ||
| 1353 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | 1373 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) |
| 1354 | { | 1374 | { |
| 1355 | int prio; | 1375 | int prio; |
| @@ -1373,19 +1393,17 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | |||
| 1373 | spin_unlock(&hb->lock); | 1393 | spin_unlock(&hb->lock); |
| 1374 | } | 1394 | } |
| 1375 | 1395 | ||
| 1376 | static inline void | 1396 | /** |
| 1377 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | 1397 | * unqueue_me() - Remove the futex_q from its futex_hash_bucket |
| 1378 | { | 1398 | * @q: The futex_q to unqueue |
| 1379 | spin_unlock(&hb->lock); | 1399 | * |
| 1380 | drop_futex_key_refs(&q->key); | 1400 | * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must |
| 1381 | } | 1401 | * be paired with exactly one earlier call to queue_me(). |
| 1382 | 1402 | * | |
| 1383 | /* | 1403 | * Returns: |
| 1384 | * queue_me and unqueue_me must be called as a pair, each | 1404 | * 1 - if the futex_q was still queued (and we removed unqueued it) |
| 1385 | * exactly once. They are called with the hashed spinlock held. | 1405 | * 0 - if the futex_q was already removed by the waking thread |
| 1386 | */ | 1406 | */ |
| 1387 | |||
| 1388 | /* Return 1 if we were still queued (ie. 0 means we were woken) */ | ||
| 1389 | static int unqueue_me(struct futex_q *q) | 1407 | static int unqueue_me(struct futex_q *q) |
| 1390 | { | 1408 | { |
| 1391 | spinlock_t *lock_ptr; | 1409 | spinlock_t *lock_ptr; |
| @@ -1638,17 +1656,14 @@ out: | |||
| 1638 | static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | 1656 | static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, |
| 1639 | struct hrtimer_sleeper *timeout) | 1657 | struct hrtimer_sleeper *timeout) |
| 1640 | { | 1658 | { |
| 1641 | queue_me(q, hb); | ||
| 1642 | |||
| 1643 | /* | 1659 | /* |
| 1644 | * There might have been scheduling since the queue_me(), as we | 1660 | * The task state is guaranteed to be set before another task can |
| 1645 | * cannot hold a spinlock across the get_user() in case it | 1661 | * wake it. set_current_state() is implemented using set_mb() and |
| 1646 | * faults, and we cannot just set TASK_INTERRUPTIBLE state when | 1662 | * queue_me() calls spin_unlock() upon completion, both serializing |
| 1647 | * queueing ourselves into the futex hash. This code thus has to | 1663 | * access to the hash list and forcing another memory barrier. |
| 1648 | * rely on the futex_wake() code removing us from hash when it | ||
| 1649 | * wakes us up. | ||
| 1650 | */ | 1664 | */ |
| 1651 | set_current_state(TASK_INTERRUPTIBLE); | 1665 | set_current_state(TASK_INTERRUPTIBLE); |
| 1666 | queue_me(q, hb); | ||
| 1652 | 1667 | ||
| 1653 | /* Arm the timer */ | 1668 | /* Arm the timer */ |
| 1654 | if (timeout) { | 1669 | if (timeout) { |
| @@ -1658,8 +1673,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | |||
| 1658 | } | 1673 | } |
| 1659 | 1674 | ||
| 1660 | /* | 1675 | /* |
| 1661 | * !plist_node_empty() is safe here without any lock. | 1676 | * If we have been removed from the hash list, then another task |
| 1662 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. | 1677 | * has tried to wake us, and we can skip the call to schedule(). |
| 1663 | */ | 1678 | */ |
| 1664 | if (likely(!plist_node_empty(&q->list))) { | 1679 | if (likely(!plist_node_empty(&q->list))) { |
| 1665 | /* | 1680 | /* |
| @@ -2114,12 +2129,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
| 2114 | 2129 | ||
| 2115 | /** | 2130 | /** |
| 2116 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 | 2131 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 |
| 2117 | * @uaddr: the futex we initialyl wait on (non-pi) | 2132 | * @uaddr: the futex we initially wait on (non-pi) |
| 2118 | * @fshared: whether the futexes are shared (1) or not (0). They must be | 2133 | * @fshared: whether the futexes are shared (1) or not (0). They must be |
| 2119 | * the same type, no requeueing from private to shared, etc. | 2134 | * the same type, no requeueing from private to shared, etc. |
| 2120 | * @val: the expected value of uaddr | 2135 | * @val: the expected value of uaddr |
| 2121 | * @abs_time: absolute timeout | 2136 | * @abs_time: absolute timeout |
| 2122 | * @bitset: 32 bit wakeup bitset set by userspace, defaults to all. | 2137 | * @bitset: 32 bit wakeup bitset set by userspace, defaults to all |
| 2123 | * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) | 2138 | * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) |
| 2124 | * @uaddr2: the pi futex we will take prior to returning to user-space | 2139 | * @uaddr2: the pi futex we will take prior to returning to user-space |
| 2125 | * | 2140 | * |
| @@ -2246,7 +2261,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
| 2246 | res = fixup_owner(uaddr2, fshared, &q, !ret); | 2261 | res = fixup_owner(uaddr2, fshared, &q, !ret); |
| 2247 | /* | 2262 | /* |
| 2248 | * If fixup_owner() returned an error, proprogate that. If it | 2263 | * If fixup_owner() returned an error, proprogate that. If it |
| 2249 | * acquired the lock, clear our -ETIMEDOUT or -EINTR. | 2264 | * acquired the lock, clear -ETIMEDOUT or -EINTR. |
| 2250 | */ | 2265 | */ |
| 2251 | if (res) | 2266 | if (res) |
| 2252 | ret = (res < 0) ? res : 0; | 2267 | ret = (res < 0) ? res : 0; |
| @@ -2302,9 +2317,9 @@ out: | |||
| 2302 | */ | 2317 | */ |
| 2303 | 2318 | ||
| 2304 | /** | 2319 | /** |
| 2305 | * sys_set_robust_list - set the robust-futex list head of a task | 2320 | * sys_set_robust_list() - Set the robust-futex list head of a task |
| 2306 | * @head: pointer to the list-head | 2321 | * @head: pointer to the list-head |
| 2307 | * @len: length of the list-head, as userspace expects | 2322 | * @len: length of the list-head, as userspace expects |
| 2308 | */ | 2323 | */ |
| 2309 | SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, | 2324 | SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, |
| 2310 | size_t, len) | 2325 | size_t, len) |
| @@ -2323,10 +2338,10 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, | |||
| 2323 | } | 2338 | } |
| 2324 | 2339 | ||
| 2325 | /** | 2340 | /** |
| 2326 | * sys_get_robust_list - get the robust-futex list head of a task | 2341 | * sys_get_robust_list() - Get the robust-futex list head of a task |
| 2327 | * @pid: pid of the process [zero for current task] | 2342 | * @pid: pid of the process [zero for current task] |
| 2328 | * @head_ptr: pointer to a list-head pointer, the kernel fills it in | 2343 | * @head_ptr: pointer to a list-head pointer, the kernel fills it in |
| 2329 | * @len_ptr: pointer to a length field, the kernel fills in the header size | 2344 | * @len_ptr: pointer to a length field, the kernel fills in the header size |
| 2330 | */ | 2345 | */ |
| 2331 | SYSCALL_DEFINE3(get_robust_list, int, pid, | 2346 | SYSCALL_DEFINE3(get_robust_list, int, pid, |
| 2332 | struct robust_list_head __user * __user *, head_ptr, | 2347 | struct robust_list_head __user * __user *, head_ptr, |
