diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-26 13:15:53 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-26 13:15:53 -0400 |
commit | 3b383767c41be070cae24875789d97b42a3e71a8 (patch) | |
tree | 414a2c3aa750fd25b84f477e2b9cdca17086602a | |
parent | 49e70dda359660f20fa21d03bfae132e15c78195 (diff) | |
parent | 9beba3c54dd180a26a1da2027cfbe9edfaf9c40e (diff) |
Merge branch 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
futex: Add memory barrier commentary to futex_wait_queue_me()
futex: Fix wakeup race by setting TASK_INTERRUPTIBLE before queue_me()
futex: Correct futex_q woken state commentary
futex: Make function kernel-doc commentary consistent
futex: Correct queue_me and unqueue_me commentary
futex: Correct futex_wait_requeue_pi() commentary
-rw-r--r-- | kernel/futex.c | 137 |
1 files changed, 76 insertions, 61 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 248dd119a86e..b911adceb2c4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -89,36 +89,36 @@ struct futex_pi_state { | |||
89 | union futex_key key; | 89 | union futex_key key; |
90 | }; | 90 | }; |
91 | 91 | ||
92 | /* | 92 | /** |
93 | * We use this hashed waitqueue instead of a normal wait_queue_t, so | 93 | * struct futex_q - The hashed futex queue entry, one per waiting task |
94 | * @task: the task waiting on the futex | ||
95 | * @lock_ptr: the hash bucket lock | ||
96 | * @key: the key the futex is hashed on | ||
97 | * @pi_state: optional priority inheritance state | ||
98 | * @rt_waiter: rt_waiter storage for use with requeue_pi | ||
99 | * @requeue_pi_key: the requeue_pi target futex key | ||
100 | * @bitset: bitset for the optional bitmasked wakeup | ||
101 | * | ||
102 | * We use this hashed waitqueue, instead of a normal wait_queue_t, so | ||
94 | * we can wake only the relevant ones (hashed queues may be shared). | 103 | * we can wake only the relevant ones (hashed queues may be shared). |
95 | * | 104 | * |
96 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. | 105 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. |
97 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. | 106 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. |
98 | * The order of wakup is always to make the first condition true, then | 107 | * The order of wakup is always to make the first condition true, then |
99 | * wake up q->waiter, then make the second condition true. | 108 | * the second. |
109 | * | ||
110 | * PI futexes are typically woken before they are removed from the hash list via | ||
111 | * the rt_mutex code. See unqueue_me_pi(). | ||
100 | */ | 112 | */ |
101 | struct futex_q { | 113 | struct futex_q { |
102 | struct plist_node list; | 114 | struct plist_node list; |
103 | /* Waiter reference */ | ||
104 | struct task_struct *task; | ||
105 | 115 | ||
106 | /* Which hash list lock to use: */ | 116 | struct task_struct *task; |
107 | spinlock_t *lock_ptr; | 117 | spinlock_t *lock_ptr; |
108 | |||
109 | /* Key which the futex is hashed on: */ | ||
110 | union futex_key key; | 118 | union futex_key key; |
111 | |||
112 | /* Optional priority inheritance state: */ | ||
113 | struct futex_pi_state *pi_state; | 119 | struct futex_pi_state *pi_state; |
114 | |||
115 | /* rt_waiter storage for requeue_pi: */ | ||
116 | struct rt_mutex_waiter *rt_waiter; | 120 | struct rt_mutex_waiter *rt_waiter; |
117 | |||
118 | /* The expected requeue pi target futex key: */ | ||
119 | union futex_key *requeue_pi_key; | 121 | union futex_key *requeue_pi_key; |
120 | |||
121 | /* Bitset for the optional bitmasked wakeup */ | ||
122 | u32 bitset; | 122 | u32 bitset; |
123 | }; | 123 | }; |
124 | 124 | ||
@@ -198,11 +198,12 @@ static void drop_futex_key_refs(union futex_key *key) | |||
198 | } | 198 | } |
199 | 199 | ||
200 | /** | 200 | /** |
201 | * get_futex_key - Get parameters which are the keys for a futex. | 201 | * get_futex_key() - Get parameters which are the keys for a futex |
202 | * @uaddr: virtual address of the futex | 202 | * @uaddr: virtual address of the futex |
203 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED | 203 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED |
204 | * @key: address where result is stored. | 204 | * @key: address where result is stored. |
205 | * @rw: mapping needs to be read/write (values: VERIFY_READ, VERIFY_WRITE) | 205 | * @rw: mapping needs to be read/write (values: VERIFY_READ, |
206 | * VERIFY_WRITE) | ||
206 | * | 207 | * |
207 | * Returns a negative error code or 0 | 208 | * Returns a negative error code or 0 |
208 | * The key words are stored in *key on success. | 209 | * The key words are stored in *key on success. |
@@ -288,8 +289,8 @@ void put_futex_key(int fshared, union futex_key *key) | |||
288 | drop_futex_key_refs(key); | 289 | drop_futex_key_refs(key); |
289 | } | 290 | } |
290 | 291 | ||
291 | /* | 292 | /** |
292 | * fault_in_user_writeable - fault in user address and verify RW access | 293 | * fault_in_user_writeable() - Fault in user address and verify RW access |
293 | * @uaddr: pointer to faulting user space address | 294 | * @uaddr: pointer to faulting user space address |
294 | * | 295 | * |
295 | * Slow path to fixup the fault we just took in the atomic write | 296 | * Slow path to fixup the fault we just took in the atomic write |
@@ -309,8 +310,8 @@ static int fault_in_user_writeable(u32 __user *uaddr) | |||
309 | 310 | ||
310 | /** | 311 | /** |
311 | * futex_top_waiter() - Return the highest priority waiter on a futex | 312 | * futex_top_waiter() - Return the highest priority waiter on a futex |
312 | * @hb: the hash bucket the futex_q's reside in | 313 | * @hb: the hash bucket the futex_q's reside in |
313 | * @key: the futex key (to distinguish it from other futex futex_q's) | 314 | * @key: the futex key (to distinguish it from other futex futex_q's) |
314 | * | 315 | * |
315 | * Must be called with the hb lock held. | 316 | * Must be called with the hb lock held. |
316 | */ | 317 | */ |
@@ -588,7 +589,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
588 | } | 589 | } |
589 | 590 | ||
590 | /** | 591 | /** |
591 | * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex | 592 | * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex |
592 | * @uaddr: the pi futex user address | 593 | * @uaddr: the pi futex user address |
593 | * @hb: the pi futex hash bucket | 594 | * @hb: the pi futex hash bucket |
594 | * @key: the futex key associated with uaddr and hb | 595 | * @key: the futex key associated with uaddr and hb |
@@ -1011,9 +1012,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, | |||
1011 | 1012 | ||
1012 | /** | 1013 | /** |
1013 | * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue | 1014 | * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue |
1014 | * q: the futex_q | 1015 | * @q: the futex_q |
1015 | * key: the key of the requeue target futex | 1016 | * @key: the key of the requeue target futex |
1016 | * hb: the hash_bucket of the requeue target futex | 1017 | * @hb: the hash_bucket of the requeue target futex |
1017 | * | 1018 | * |
1018 | * During futex_requeue, with requeue_pi=1, it is possible to acquire the | 1019 | * During futex_requeue, with requeue_pi=1, it is possible to acquire the |
1019 | * target futex if it is uncontended or via a lock steal. Set the futex_q key | 1020 | * target futex if it is uncontended or via a lock steal. Set the futex_q key |
@@ -1350,6 +1351,25 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | |||
1350 | return hb; | 1351 | return hb; |
1351 | } | 1352 | } |
1352 | 1353 | ||
1354 | static inline void | ||
1355 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | ||
1356 | { | ||
1357 | spin_unlock(&hb->lock); | ||
1358 | drop_futex_key_refs(&q->key); | ||
1359 | } | ||
1360 | |||
1361 | /** | ||
1362 | * queue_me() - Enqueue the futex_q on the futex_hash_bucket | ||
1363 | * @q: The futex_q to enqueue | ||
1364 | * @hb: The destination hash bucket | ||
1365 | * | ||
1366 | * The hb->lock must be held by the caller, and is released here. A call to | ||
1367 | * queue_me() is typically paired with exactly one call to unqueue_me(). The | ||
1368 | * exceptions involve the PI related operations, which may use unqueue_me_pi() | ||
1369 | * or nothing if the unqueue is done as part of the wake process and the unqueue | ||
1370 | * state is implicit in the state of woken task (see futex_wait_requeue_pi() for | ||
1371 | * an example). | ||
1372 | */ | ||
1353 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | 1373 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) |
1354 | { | 1374 | { |
1355 | int prio; | 1375 | int prio; |
@@ -1373,19 +1393,17 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | |||
1373 | spin_unlock(&hb->lock); | 1393 | spin_unlock(&hb->lock); |
1374 | } | 1394 | } |
1375 | 1395 | ||
1376 | static inline void | 1396 | /** |
1377 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | 1397 | * unqueue_me() - Remove the futex_q from its futex_hash_bucket |
1378 | { | 1398 | * @q: The futex_q to unqueue |
1379 | spin_unlock(&hb->lock); | 1399 | * |
1380 | drop_futex_key_refs(&q->key); | 1400 | * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must |
1381 | } | 1401 | * be paired with exactly one earlier call to queue_me(). |
1382 | 1402 | * | |
1383 | /* | 1403 | * Returns: |
1384 | * queue_me and unqueue_me must be called as a pair, each | 1404 | * 1 - if the futex_q was still queued (and we removed unqueued it) |
1385 | * exactly once. They are called with the hashed spinlock held. | 1405 | * 0 - if the futex_q was already removed by the waking thread |
1386 | */ | 1406 | */ |
1387 | |||
1388 | /* Return 1 if we were still queued (ie. 0 means we were woken) */ | ||
1389 | static int unqueue_me(struct futex_q *q) | 1407 | static int unqueue_me(struct futex_q *q) |
1390 | { | 1408 | { |
1391 | spinlock_t *lock_ptr; | 1409 | spinlock_t *lock_ptr; |
@@ -1638,17 +1656,14 @@ out: | |||
1638 | static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | 1656 | static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, |
1639 | struct hrtimer_sleeper *timeout) | 1657 | struct hrtimer_sleeper *timeout) |
1640 | { | 1658 | { |
1641 | queue_me(q, hb); | ||
1642 | |||
1643 | /* | 1659 | /* |
1644 | * There might have been scheduling since the queue_me(), as we | 1660 | * The task state is guaranteed to be set before another task can |
1645 | * cannot hold a spinlock across the get_user() in case it | 1661 | * wake it. set_current_state() is implemented using set_mb() and |
1646 | * faults, and we cannot just set TASK_INTERRUPTIBLE state when | 1662 | * queue_me() calls spin_unlock() upon completion, both serializing |
1647 | * queueing ourselves into the futex hash. This code thus has to | 1663 | * access to the hash list and forcing another memory barrier. |
1648 | * rely on the futex_wake() code removing us from hash when it | ||
1649 | * wakes us up. | ||
1650 | */ | 1664 | */ |
1651 | set_current_state(TASK_INTERRUPTIBLE); | 1665 | set_current_state(TASK_INTERRUPTIBLE); |
1666 | queue_me(q, hb); | ||
1652 | 1667 | ||
1653 | /* Arm the timer */ | 1668 | /* Arm the timer */ |
1654 | if (timeout) { | 1669 | if (timeout) { |
@@ -1658,8 +1673,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | |||
1658 | } | 1673 | } |
1659 | 1674 | ||
1660 | /* | 1675 | /* |
1661 | * !plist_node_empty() is safe here without any lock. | 1676 | * If we have been removed from the hash list, then another task |
1662 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. | 1677 | * has tried to wake us, and we can skip the call to schedule(). |
1663 | */ | 1678 | */ |
1664 | if (likely(!plist_node_empty(&q->list))) { | 1679 | if (likely(!plist_node_empty(&q->list))) { |
1665 | /* | 1680 | /* |
@@ -2114,12 +2129,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2114 | 2129 | ||
2115 | /** | 2130 | /** |
2116 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 | 2131 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 |
2117 | * @uaddr: the futex we initialyl wait on (non-pi) | 2132 | * @uaddr: the futex we initially wait on (non-pi) |
2118 | * @fshared: whether the futexes are shared (1) or not (0). They must be | 2133 | * @fshared: whether the futexes are shared (1) or not (0). They must be |
2119 | * the same type, no requeueing from private to shared, etc. | 2134 | * the same type, no requeueing from private to shared, etc. |
2120 | * @val: the expected value of uaddr | 2135 | * @val: the expected value of uaddr |
2121 | * @abs_time: absolute timeout | 2136 | * @abs_time: absolute timeout |
2122 | * @bitset: 32 bit wakeup bitset set by userspace, defaults to all. | 2137 | * @bitset: 32 bit wakeup bitset set by userspace, defaults to all |
2123 | * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) | 2138 | * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) |
2124 | * @uaddr2: the pi futex we will take prior to returning to user-space | 2139 | * @uaddr2: the pi futex we will take prior to returning to user-space |
2125 | * | 2140 | * |
@@ -2246,7 +2261,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2246 | res = fixup_owner(uaddr2, fshared, &q, !ret); | 2261 | res = fixup_owner(uaddr2, fshared, &q, !ret); |
2247 | /* | 2262 | /* |
2248 | * If fixup_owner() returned an error, proprogate that. If it | 2263 | * If fixup_owner() returned an error, proprogate that. If it |
2249 | * acquired the lock, clear our -ETIMEDOUT or -EINTR. | 2264 | * acquired the lock, clear -ETIMEDOUT or -EINTR. |
2250 | */ | 2265 | */ |
2251 | if (res) | 2266 | if (res) |
2252 | ret = (res < 0) ? res : 0; | 2267 | ret = (res < 0) ? res : 0; |
@@ -2302,9 +2317,9 @@ out: | |||
2302 | */ | 2317 | */ |
2303 | 2318 | ||
2304 | /** | 2319 | /** |
2305 | * sys_set_robust_list - set the robust-futex list head of a task | 2320 | * sys_set_robust_list() - Set the robust-futex list head of a task |
2306 | * @head: pointer to the list-head | 2321 | * @head: pointer to the list-head |
2307 | * @len: length of the list-head, as userspace expects | 2322 | * @len: length of the list-head, as userspace expects |
2308 | */ | 2323 | */ |
2309 | SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, | 2324 | SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, |
2310 | size_t, len) | 2325 | size_t, len) |
@@ -2323,10 +2338,10 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, | |||
2323 | } | 2338 | } |
2324 | 2339 | ||
2325 | /** | 2340 | /** |
2326 | * sys_get_robust_list - get the robust-futex list head of a task | 2341 | * sys_get_robust_list() - Get the robust-futex list head of a task |
2327 | * @pid: pid of the process [zero for current task] | 2342 | * @pid: pid of the process [zero for current task] |
2328 | * @head_ptr: pointer to a list-head pointer, the kernel fills it in | 2343 | * @head_ptr: pointer to a list-head pointer, the kernel fills it in |
2329 | * @len_ptr: pointer to a length field, the kernel fills in the header size | 2344 | * @len_ptr: pointer to a length field, the kernel fills in the header size |
2330 | */ | 2345 | */ |
2331 | SYSCALL_DEFINE3(get_robust_list, int, pid, | 2346 | SYSCALL_DEFINE3(get_robust_list, int, pid, |
2332 | struct robust_list_head __user * __user *, head_ptr, | 2347 | struct robust_list_head __user * __user *, head_ptr, |