aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/futex.c
diff options
context:
space:
mode:
authorHaavard Skinnemoen <haavard.skinnemoen@atmel.com>2009-10-06 11:36:55 -0400
committerHaavard Skinnemoen <haavard.skinnemoen@atmel.com>2009-10-06 11:36:55 -0400
commitd94e5fcbf1420366dcb4102bafe04dbcfc0d0d4b (patch)
treea9b7de7df6da5c3132cc68169b9c47ba288ccd42 /kernel/futex.c
parentd55651168a20078a94597a297d5cdfd807bf07b6 (diff)
parent374576a8b6f865022c0fd1ca62396889b23d66dd (diff)
Merge commit 'v2.6.32-rc3'
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c180
1 files changed, 104 insertions, 76 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index e18cfbdc7190..b911adceb2c4 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -89,33 +89,36 @@ struct futex_pi_state {
89 union futex_key key; 89 union futex_key key;
90}; 90};
91 91
92/* 92/**
93 * We use this hashed waitqueue instead of a normal wait_queue_t, so 93 * struct futex_q - The hashed futex queue entry, one per waiting task
94 * @task: the task waiting on the futex
95 * @lock_ptr: the hash bucket lock
96 * @key: the key the futex is hashed on
97 * @pi_state: optional priority inheritance state
98 * @rt_waiter: rt_waiter storage for use with requeue_pi
99 * @requeue_pi_key: the requeue_pi target futex key
100 * @bitset: bitset for the optional bitmasked wakeup
101 *
102 * We use this hashed waitqueue, instead of a normal wait_queue_t, so
94 * we can wake only the relevant ones (hashed queues may be shared). 103 * we can wake only the relevant ones (hashed queues may be shared).
95 * 104 *
96 * A futex_q has a woken state, just like tasks have TASK_RUNNING. 105 * A futex_q has a woken state, just like tasks have TASK_RUNNING.
97 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. 106 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
98 * The order of wakup is always to make the first condition true, then 107 * The order of wakup is always to make the first condition true, then
99 * wake up q->waiter, then make the second condition true. 108 * the second.
109 *
110 * PI futexes are typically woken before they are removed from the hash list via
111 * the rt_mutex code. See unqueue_me_pi().
100 */ 112 */
101struct futex_q { 113struct futex_q {
102 struct plist_node list; 114 struct plist_node list;
103 /* Waiter reference */
104 struct task_struct *task;
105 115
106 /* Which hash list lock to use: */ 116 struct task_struct *task;
107 spinlock_t *lock_ptr; 117 spinlock_t *lock_ptr;
108
109 /* Key which the futex is hashed on: */
110 union futex_key key; 118 union futex_key key;
111
112 /* Optional priority inheritance state: */
113 struct futex_pi_state *pi_state; 119 struct futex_pi_state *pi_state;
114
115 /* rt_waiter storage for requeue_pi: */
116 struct rt_mutex_waiter *rt_waiter; 120 struct rt_mutex_waiter *rt_waiter;
117 121 union futex_key *requeue_pi_key;
118 /* Bitset for the optional bitmasked wakeup */
119 u32 bitset; 122 u32 bitset;
120}; 123};
121 124
@@ -195,11 +198,12 @@ static void drop_futex_key_refs(union futex_key *key)
195} 198}
196 199
197/** 200/**
198 * get_futex_key - Get parameters which are the keys for a futex. 201 * get_futex_key() - Get parameters which are the keys for a futex
199 * @uaddr: virtual address of the futex 202 * @uaddr: virtual address of the futex
200 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED 203 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
201 * @key: address where result is stored. 204 * @key: address where result is stored.
202 * @rw: mapping needs to be read/write (values: VERIFY_READ, VERIFY_WRITE) 205 * @rw: mapping needs to be read/write (values: VERIFY_READ,
206 * VERIFY_WRITE)
203 * 207 *
204 * Returns a negative error code or 0 208 * Returns a negative error code or 0
205 * The key words are stored in *key on success. 209 * The key words are stored in *key on success.
@@ -285,8 +289,8 @@ void put_futex_key(int fshared, union futex_key *key)
285 drop_futex_key_refs(key); 289 drop_futex_key_refs(key);
286} 290}
287 291
288/* 292/**
289 * fault_in_user_writeable - fault in user address and verify RW access 293 * fault_in_user_writeable() - Fault in user address and verify RW access
290 * @uaddr: pointer to faulting user space address 294 * @uaddr: pointer to faulting user space address
291 * 295 *
292 * Slow path to fixup the fault we just took in the atomic write 296 * Slow path to fixup the fault we just took in the atomic write
@@ -306,8 +310,8 @@ static int fault_in_user_writeable(u32 __user *uaddr)
306 310
307/** 311/**
308 * futex_top_waiter() - Return the highest priority waiter on a futex 312 * futex_top_waiter() - Return the highest priority waiter on a futex
309 * @hb: the hash bucket the futex_q's reside in 313 * @hb: the hash bucket the futex_q's reside in
310 * @key: the futex key (to distinguish it from other futex futex_q's) 314 * @key: the futex key (to distinguish it from other futex futex_q's)
311 * 315 *
312 * Must be called with the hb lock held. 316 * Must be called with the hb lock held.
313 */ 317 */
@@ -585,7 +589,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
585} 589}
586 590
587/** 591/**
588 * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex 592 * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
589 * @uaddr: the pi futex user address 593 * @uaddr: the pi futex user address
590 * @hb: the pi futex hash bucket 594 * @hb: the pi futex hash bucket
591 * @key: the futex key associated with uaddr and hb 595 * @key: the futex key associated with uaddr and hb
@@ -1008,9 +1012,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1008 1012
1009/** 1013/**
1010 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue 1014 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
1011 * q: the futex_q 1015 * @q: the futex_q
1012 * key: the key of the requeue target futex 1016 * @key: the key of the requeue target futex
1013 * hb: the hash_bucket of the requeue target futex 1017 * @hb: the hash_bucket of the requeue target futex
1014 * 1018 *
1015 * During futex_requeue, with requeue_pi=1, it is possible to acquire the 1019 * During futex_requeue, with requeue_pi=1, it is possible to acquire the
1016 * target futex if it is uncontended or via a lock steal. Set the futex_q key 1020 * target futex if it is uncontended or via a lock steal. Set the futex_q key
@@ -1089,6 +1093,10 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1089 if (!top_waiter) 1093 if (!top_waiter)
1090 return 0; 1094 return 0;
1091 1095
1096 /* Ensure we requeue to the expected futex. */
1097 if (!match_futex(top_waiter->requeue_pi_key, key2))
1098 return -EINVAL;
1099
1092 /* 1100 /*
1093 * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in 1101 * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in
1094 * the contended case or if set_waiters is 1. The pi_state is returned 1102 * the contended case or if set_waiters is 1. The pi_state is returned
@@ -1276,6 +1284,12 @@ retry_private:
1276 continue; 1284 continue;
1277 } 1285 }
1278 1286
1287 /* Ensure we requeue to the expected futex for requeue_pi. */
1288 if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
1289 ret = -EINVAL;
1290 break;
1291 }
1292
1279 /* 1293 /*
1280 * Requeue nr_requeue waiters and possibly one more in the case 1294 * Requeue nr_requeue waiters and possibly one more in the case
1281 * of requeue_pi if we couldn't acquire the lock atomically. 1295 * of requeue_pi if we couldn't acquire the lock atomically.
@@ -1337,6 +1351,25 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1337 return hb; 1351 return hb;
1338} 1352}
1339 1353
1354static inline void
1355queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1356{
1357 spin_unlock(&hb->lock);
1358 drop_futex_key_refs(&q->key);
1359}
1360
1361/**
1362 * queue_me() - Enqueue the futex_q on the futex_hash_bucket
1363 * @q: The futex_q to enqueue
1364 * @hb: The destination hash bucket
1365 *
1366 * The hb->lock must be held by the caller, and is released here. A call to
1367 * queue_me() is typically paired with exactly one call to unqueue_me(). The
1368 * exceptions involve the PI related operations, which may use unqueue_me_pi()
1369 * or nothing if the unqueue is done as part of the wake process and the unqueue
1370 * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
1371 * an example).
1372 */
1340static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) 1373static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1341{ 1374{
1342 int prio; 1375 int prio;
@@ -1360,19 +1393,17 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1360 spin_unlock(&hb->lock); 1393 spin_unlock(&hb->lock);
1361} 1394}
1362 1395
1363static inline void 1396/**
1364queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) 1397 * unqueue_me() - Remove the futex_q from its futex_hash_bucket
1365{ 1398 * @q: The futex_q to unqueue
1366 spin_unlock(&hb->lock); 1399 *
1367 drop_futex_key_refs(&q->key); 1400 * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
1368} 1401 * be paired with exactly one earlier call to queue_me().
1369 1402 *
1370/* 1403 * Returns:
1371 * queue_me and unqueue_me must be called as a pair, each 1404 * 1 - if the futex_q was still queued (and we removed unqueued it)
1372 * exactly once. They are called with the hashed spinlock held. 1405 * 0 - if the futex_q was already removed by the waking thread
1373 */ 1406 */
1374
1375/* Return 1 if we were still queued (ie. 0 means we were woken) */
1376static int unqueue_me(struct futex_q *q) 1407static int unqueue_me(struct futex_q *q)
1377{ 1408{
1378 spinlock_t *lock_ptr; 1409 spinlock_t *lock_ptr;
@@ -1625,17 +1656,14 @@ out:
1625static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, 1656static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1626 struct hrtimer_sleeper *timeout) 1657 struct hrtimer_sleeper *timeout)
1627{ 1658{
1628 queue_me(q, hb);
1629
1630 /* 1659 /*
1631 * There might have been scheduling since the queue_me(), as we 1660 * The task state is guaranteed to be set before another task can
1632 * cannot hold a spinlock across the get_user() in case it 1661 * wake it. set_current_state() is implemented using set_mb() and
1633 * faults, and we cannot just set TASK_INTERRUPTIBLE state when 1662 * queue_me() calls spin_unlock() upon completion, both serializing
1634 * queueing ourselves into the futex hash. This code thus has to 1663 * access to the hash list and forcing another memory barrier.
1635 * rely on the futex_wake() code removing us from hash when it
1636 * wakes us up.
1637 */ 1664 */
1638 set_current_state(TASK_INTERRUPTIBLE); 1665 set_current_state(TASK_INTERRUPTIBLE);
1666 queue_me(q, hb);
1639 1667
1640 /* Arm the timer */ 1668 /* Arm the timer */
1641 if (timeout) { 1669 if (timeout) {
@@ -1645,8 +1673,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1645 } 1673 }
1646 1674
1647 /* 1675 /*
1648 * !plist_node_empty() is safe here without any lock. 1676 * If we have been removed from the hash list, then another task
1649 * q.lock_ptr != 0 is not safe, because of ordering against wakeup. 1677 * has tried to wake us, and we can skip the call to schedule().
1650 */ 1678 */
1651 if (likely(!plist_node_empty(&q->list))) { 1679 if (likely(!plist_node_empty(&q->list))) {
1652 /* 1680 /*
@@ -1751,6 +1779,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
1751 q.pi_state = NULL; 1779 q.pi_state = NULL;
1752 q.bitset = bitset; 1780 q.bitset = bitset;
1753 q.rt_waiter = NULL; 1781 q.rt_waiter = NULL;
1782 q.requeue_pi_key = NULL;
1754 1783
1755 if (abs_time) { 1784 if (abs_time) {
1756 to = &timeout; 1785 to = &timeout;
@@ -1858,6 +1887,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
1858 1887
1859 q.pi_state = NULL; 1888 q.pi_state = NULL;
1860 q.rt_waiter = NULL; 1889 q.rt_waiter = NULL;
1890 q.requeue_pi_key = NULL;
1861retry: 1891retry:
1862 q.key = FUTEX_KEY_INIT; 1892 q.key = FUTEX_KEY_INIT;
1863 ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE); 1893 ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE);
@@ -2099,12 +2129,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2099 2129
2100/** 2130/**
2101 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 2131 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
2102 * @uaddr: the futex we initialyl wait on (non-pi) 2132 * @uaddr: the futex we initially wait on (non-pi)
2103 * @fshared: whether the futexes are shared (1) or not (0). They must be 2133 * @fshared: whether the futexes are shared (1) or not (0). They must be
2104 * the same type, no requeueing from private to shared, etc. 2134 * the same type, no requeueing from private to shared, etc.
2105 * @val: the expected value of uaddr 2135 * @val: the expected value of uaddr
2106 * @abs_time: absolute timeout 2136 * @abs_time: absolute timeout
2107 * @bitset: 32 bit wakeup bitset set by userspace, defaults to all. 2137 * @bitset: 32 bit wakeup bitset set by userspace, defaults to all
2108 * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) 2138 * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)
2109 * @uaddr2: the pi futex we will take prior to returning to user-space 2139 * @uaddr2: the pi futex we will take prior to returning to user-space
2110 * 2140 *
@@ -2118,11 +2148,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2118 * We call schedule in futex_wait_queue_me() when we enqueue and return there 2148 * We call schedule in futex_wait_queue_me() when we enqueue and return there
2119 * via the following: 2149 * via the following:
2120 * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() 2150 * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
2121 * 2) wakeup on uaddr2 after a requeue and subsequent unlock 2151 * 2) wakeup on uaddr2 after a requeue
2122 * 3) signal (before or after requeue) 2152 * 3) signal
2123 * 4) timeout (before or after requeue) 2153 * 4) timeout
2124 * 2154 *
2125 * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function. 2155 * If 3, cleanup and return -ERESTARTNOINTR.
2126 * 2156 *
2127 * If 2, we may then block on trying to take the rt_mutex and return via: 2157 * If 2, we may then block on trying to take the rt_mutex and return via:
2128 * 5) successful lock 2158 * 5) successful lock
@@ -2130,7 +2160,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2130 * 7) timeout 2160 * 7) timeout
2131 * 8) other lock acquisition failure 2161 * 8) other lock acquisition failure
2132 * 2162 *
2133 * If 6, we setup a restart_block with futex_lock_pi() as the function. 2163 * If 6, return -EWOULDBLOCK (restarting the syscall would do the same).
2134 * 2164 *
2135 * If 4 or 7, we cleanup and return with -ETIMEDOUT. 2165 * If 4 or 7, we cleanup and return with -ETIMEDOUT.
2136 * 2166 *
@@ -2169,15 +2199,16 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2169 debug_rt_mutex_init_waiter(&rt_waiter); 2199 debug_rt_mutex_init_waiter(&rt_waiter);
2170 rt_waiter.task = NULL; 2200 rt_waiter.task = NULL;
2171 2201
2172 q.pi_state = NULL;
2173 q.bitset = bitset;
2174 q.rt_waiter = &rt_waiter;
2175
2176 key2 = FUTEX_KEY_INIT; 2202 key2 = FUTEX_KEY_INIT;
2177 ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); 2203 ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
2178 if (unlikely(ret != 0)) 2204 if (unlikely(ret != 0))
2179 goto out; 2205 goto out;
2180 2206
2207 q.pi_state = NULL;
2208 q.bitset = bitset;
2209 q.rt_waiter = &rt_waiter;
2210 q.requeue_pi_key = &key2;
2211
2181 /* Prepare to wait on uaddr. */ 2212 /* Prepare to wait on uaddr. */
2182 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); 2213 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
2183 if (ret) 2214 if (ret)
@@ -2230,7 +2261,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2230 res = fixup_owner(uaddr2, fshared, &q, !ret); 2261 res = fixup_owner(uaddr2, fshared, &q, !ret);
2231 /* 2262 /*
2232 * If fixup_owner() returned an error, proprogate that. If it 2263 * If fixup_owner() returned an error, proprogate that. If it
2233 * acquired the lock, clear our -ETIMEDOUT or -EINTR. 2264 * acquired the lock, clear -ETIMEDOUT or -EINTR.
2234 */ 2265 */
2235 if (res) 2266 if (res)
2236 ret = (res < 0) ? res : 0; 2267 ret = (res < 0) ? res : 0;
@@ -2248,14 +2279,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2248 rt_mutex_unlock(pi_mutex); 2279 rt_mutex_unlock(pi_mutex);
2249 } else if (ret == -EINTR) { 2280 } else if (ret == -EINTR) {
2250 /* 2281 /*
2251 * We've already been requeued, but we have no way to 2282 * We've already been requeued, but cannot restart by calling
2252 * restart by calling futex_lock_pi() directly. We 2283 * futex_lock_pi() directly. We could restart this syscall, but
2253 * could restart the syscall, but that will look at 2284 * it would detect that the user space "val" changed and return
2254 * the user space value and return right away. So we 2285 * -EWOULDBLOCK. Save the overhead of the restart and return
2255 * drop back with EWOULDBLOCK to tell user space that 2286 * -EWOULDBLOCK directly.
2256 * "val" has been changed. That's the same what the
2257 * restart of the syscall would do in
2258 * futex_wait_setup().
2259 */ 2287 */
2260 ret = -EWOULDBLOCK; 2288 ret = -EWOULDBLOCK;
2261 } 2289 }
@@ -2289,9 +2317,9 @@ out:
2289 */ 2317 */
2290 2318
2291/** 2319/**
2292 * sys_set_robust_list - set the robust-futex list head of a task 2320 * sys_set_robust_list() - Set the robust-futex list head of a task
2293 * @head: pointer to the list-head 2321 * @head: pointer to the list-head
2294 * @len: length of the list-head, as userspace expects 2322 * @len: length of the list-head, as userspace expects
2295 */ 2323 */
2296SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, 2324SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2297 size_t, len) 2325 size_t, len)
@@ -2310,10 +2338,10 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2310} 2338}
2311 2339
2312/** 2340/**
2313 * sys_get_robust_list - get the robust-futex list head of a task 2341 * sys_get_robust_list() - Get the robust-futex list head of a task
2314 * @pid: pid of the process [zero for current task] 2342 * @pid: pid of the process [zero for current task]
2315 * @head_ptr: pointer to a list-head pointer, the kernel fills it in 2343 * @head_ptr: pointer to a list-head pointer, the kernel fills it in
2316 * @len_ptr: pointer to a length field, the kernel fills in the header size 2344 * @len_ptr: pointer to a length field, the kernel fills in the header size
2317 */ 2345 */
2318SYSCALL_DEFINE3(get_robust_list, int, pid, 2346SYSCALL_DEFINE3(get_robust_list, int, pid,
2319 struct robust_list_head __user * __user *, head_ptr, 2347 struct robust_list_head __user * __user *, head_ptr,