aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/futex.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c160
1 files changed, 92 insertions, 68 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 248dd119a86e..fb65e822fc41 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -89,36 +89,36 @@ struct futex_pi_state {
89 union futex_key key; 89 union futex_key key;
90}; 90};
91 91
92/* 92/**
93 * We use this hashed waitqueue instead of a normal wait_queue_t, so 93 * struct futex_q - The hashed futex queue entry, one per waiting task
94 * @task: the task waiting on the futex
95 * @lock_ptr: the hash bucket lock
96 * @key: the key the futex is hashed on
97 * @pi_state: optional priority inheritance state
98 * @rt_waiter: rt_waiter storage for use with requeue_pi
99 * @requeue_pi_key: the requeue_pi target futex key
100 * @bitset: bitset for the optional bitmasked wakeup
101 *
102 * We use this hashed waitqueue, instead of a normal wait_queue_t, so
94 * we can wake only the relevant ones (hashed queues may be shared). 103 * we can wake only the relevant ones (hashed queues may be shared).
95 * 104 *
96 * A futex_q has a woken state, just like tasks have TASK_RUNNING. 105 * A futex_q has a woken state, just like tasks have TASK_RUNNING.
97 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. 106 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
98 * The order of wakup is always to make the first condition true, then 107 * The order of wakup is always to make the first condition true, then
99 * wake up q->waiter, then make the second condition true. 108 * the second.
109 *
110 * PI futexes are typically woken before they are removed from the hash list via
111 * the rt_mutex code. See unqueue_me_pi().
100 */ 112 */
101struct futex_q { 113struct futex_q {
102 struct plist_node list; 114 struct plist_node list;
103 /* Waiter reference */
104 struct task_struct *task;
105 115
106 /* Which hash list lock to use: */ 116 struct task_struct *task;
107 spinlock_t *lock_ptr; 117 spinlock_t *lock_ptr;
108
109 /* Key which the futex is hashed on: */
110 union futex_key key; 118 union futex_key key;
111
112 /* Optional priority inheritance state: */
113 struct futex_pi_state *pi_state; 119 struct futex_pi_state *pi_state;
114
115 /* rt_waiter storage for requeue_pi: */
116 struct rt_mutex_waiter *rt_waiter; 120 struct rt_mutex_waiter *rt_waiter;
117
118 /* The expected requeue pi target futex key: */
119 union futex_key *requeue_pi_key; 121 union futex_key *requeue_pi_key;
120
121 /* Bitset for the optional bitmasked wakeup */
122 u32 bitset; 122 u32 bitset;
123}; 123};
124 124
@@ -150,7 +150,8 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key)
150 */ 150 */
151static inline int match_futex(union futex_key *key1, union futex_key *key2) 151static inline int match_futex(union futex_key *key1, union futex_key *key2)
152{ 152{
153 return (key1->both.word == key2->both.word 153 return (key1 && key2
154 && key1->both.word == key2->both.word
154 && key1->both.ptr == key2->both.ptr 155 && key1->both.ptr == key2->both.ptr
155 && key1->both.offset == key2->both.offset); 156 && key1->both.offset == key2->both.offset);
156} 157}
@@ -198,11 +199,12 @@ static void drop_futex_key_refs(union futex_key *key)
198} 199}
199 200
200/** 201/**
201 * get_futex_key - Get parameters which are the keys for a futex. 202 * get_futex_key() - Get parameters which are the keys for a futex
202 * @uaddr: virtual address of the futex 203 * @uaddr: virtual address of the futex
203 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED 204 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
204 * @key: address where result is stored. 205 * @key: address where result is stored.
205 * @rw: mapping needs to be read/write (values: VERIFY_READ, VERIFY_WRITE) 206 * @rw: mapping needs to be read/write (values: VERIFY_READ,
207 * VERIFY_WRITE)
206 * 208 *
207 * Returns a negative error code or 0 209 * Returns a negative error code or 0
208 * The key words are stored in *key on success. 210 * The key words are stored in *key on success.
@@ -288,8 +290,8 @@ void put_futex_key(int fshared, union futex_key *key)
288 drop_futex_key_refs(key); 290 drop_futex_key_refs(key);
289} 291}
290 292
291/* 293/**
292 * fault_in_user_writeable - fault in user address and verify RW access 294 * fault_in_user_writeable() - Fault in user address and verify RW access
293 * @uaddr: pointer to faulting user space address 295 * @uaddr: pointer to faulting user space address
294 * 296 *
295 * Slow path to fixup the fault we just took in the atomic write 297 * Slow path to fixup the fault we just took in the atomic write
@@ -309,8 +311,8 @@ static int fault_in_user_writeable(u32 __user *uaddr)
309 311
310/** 312/**
311 * futex_top_waiter() - Return the highest priority waiter on a futex 313 * futex_top_waiter() - Return the highest priority waiter on a futex
312 * @hb: the hash bucket the futex_q's reside in 314 * @hb: the hash bucket the futex_q's reside in
313 * @key: the futex key (to distinguish it from other futex futex_q's) 315 * @key: the futex key (to distinguish it from other futex futex_q's)
314 * 316 *
315 * Must be called with the hb lock held. 317 * Must be called with the hb lock held.
316 */ 318 */
@@ -588,7 +590,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
588} 590}
589 591
590/** 592/**
591 * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex 593 * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
592 * @uaddr: the pi futex user address 594 * @uaddr: the pi futex user address
593 * @hb: the pi futex hash bucket 595 * @hb: the pi futex hash bucket
594 * @key: the futex key associated with uaddr and hb 596 * @key: the futex key associated with uaddr and hb
@@ -915,8 +917,8 @@ retry:
915 hb1 = hash_futex(&key1); 917 hb1 = hash_futex(&key1);
916 hb2 = hash_futex(&key2); 918 hb2 = hash_futex(&key2);
917 919
918 double_lock_hb(hb1, hb2);
919retry_private: 920retry_private:
921 double_lock_hb(hb1, hb2);
920 op_ret = futex_atomic_op_inuser(op, uaddr2); 922 op_ret = futex_atomic_op_inuser(op, uaddr2);
921 if (unlikely(op_ret < 0)) { 923 if (unlikely(op_ret < 0)) {
922 924
@@ -1011,9 +1013,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1011 1013
1012/** 1014/**
1013 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue 1015 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
1014 * q: the futex_q 1016 * @q: the futex_q
1015 * key: the key of the requeue target futex 1017 * @key: the key of the requeue target futex
1016 * hb: the hash_bucket of the requeue target futex 1018 * @hb: the hash_bucket of the requeue target futex
1017 * 1019 *
1018 * During futex_requeue, with requeue_pi=1, it is possible to acquire the 1020 * During futex_requeue, with requeue_pi=1, it is possible to acquire the
1019 * target futex if it is uncontended or via a lock steal. Set the futex_q key 1021 * target futex if it is uncontended or via a lock steal. Set the futex_q key
@@ -1027,7 +1029,6 @@ static inline
1027void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, 1029void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1028 struct futex_hash_bucket *hb) 1030 struct futex_hash_bucket *hb)
1029{ 1031{
1030 drop_futex_key_refs(&q->key);
1031 get_futex_key_refs(key); 1032 get_futex_key_refs(key);
1032 q->key = *key; 1033 q->key = *key;
1033 1034
@@ -1225,6 +1226,7 @@ retry_private:
1225 */ 1226 */
1226 if (ret == 1) { 1227 if (ret == 1) {
1227 WARN_ON(pi_state); 1228 WARN_ON(pi_state);
1229 drop_count++;
1228 task_count++; 1230 task_count++;
1229 ret = get_futex_value_locked(&curval2, uaddr2); 1231 ret = get_futex_value_locked(&curval2, uaddr2);
1230 if (!ret) 1232 if (!ret)
@@ -1303,6 +1305,7 @@ retry_private:
1303 if (ret == 1) { 1305 if (ret == 1) {
1304 /* We got the lock. */ 1306 /* We got the lock. */
1305 requeue_pi_wake_futex(this, &key2, hb2); 1307 requeue_pi_wake_futex(this, &key2, hb2);
1308 drop_count++;
1306 continue; 1309 continue;
1307 } else if (ret) { 1310 } else if (ret) {
1308 /* -EDEADLK */ 1311 /* -EDEADLK */
@@ -1350,6 +1353,25 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1350 return hb; 1353 return hb;
1351} 1354}
1352 1355
1356static inline void
1357queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1358{
1359 spin_unlock(&hb->lock);
1360 drop_futex_key_refs(&q->key);
1361}
1362
1363/**
1364 * queue_me() - Enqueue the futex_q on the futex_hash_bucket
1365 * @q: The futex_q to enqueue
1366 * @hb: The destination hash bucket
1367 *
1368 * The hb->lock must be held by the caller, and is released here. A call to
1369 * queue_me() is typically paired with exactly one call to unqueue_me(). The
1370 * exceptions involve the PI related operations, which may use unqueue_me_pi()
1371 * or nothing if the unqueue is done as part of the wake process and the unqueue
1372 * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
1373 * an example).
1374 */
1353static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) 1375static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1354{ 1376{
1355 int prio; 1377 int prio;
@@ -1373,19 +1395,17 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1373 spin_unlock(&hb->lock); 1395 spin_unlock(&hb->lock);
1374} 1396}
1375 1397
1376static inline void 1398/**
1377queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) 1399 * unqueue_me() - Remove the futex_q from its futex_hash_bucket
1378{ 1400 * @q: The futex_q to unqueue
1379 spin_unlock(&hb->lock); 1401 *
1380 drop_futex_key_refs(&q->key); 1402 * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
1381} 1403 * be paired with exactly one earlier call to queue_me().
1382 1404 *
1383/* 1405 * Returns:
1384 * queue_me and unqueue_me must be called as a pair, each 1406 * 1 - if the futex_q was still queued (and we removed unqueued it)
1385 * exactly once. They are called with the hashed spinlock held. 1407 * 0 - if the futex_q was already removed by the waking thread
1386 */ 1408 */
1387
1388/* Return 1 if we were still queued (ie. 0 means we were woken) */
1389static int unqueue_me(struct futex_q *q) 1409static int unqueue_me(struct futex_q *q)
1390{ 1410{
1391 spinlock_t *lock_ptr; 1411 spinlock_t *lock_ptr;
@@ -1638,17 +1658,14 @@ out:
1638static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, 1658static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1639 struct hrtimer_sleeper *timeout) 1659 struct hrtimer_sleeper *timeout)
1640{ 1660{
1641 queue_me(q, hb);
1642
1643 /* 1661 /*
1644 * There might have been scheduling since the queue_me(), as we 1662 * The task state is guaranteed to be set before another task can
1645 * cannot hold a spinlock across the get_user() in case it 1663 * wake it. set_current_state() is implemented using set_mb() and
1646 * faults, and we cannot just set TASK_INTERRUPTIBLE state when 1664 * queue_me() calls spin_unlock() upon completion, both serializing
1647 * queueing ourselves into the futex hash. This code thus has to 1665 * access to the hash list and forcing another memory barrier.
1648 * rely on the futex_wake() code removing us from hash when it
1649 * wakes us up.
1650 */ 1666 */
1651 set_current_state(TASK_INTERRUPTIBLE); 1667 set_current_state(TASK_INTERRUPTIBLE);
1668 queue_me(q, hb);
1652 1669
1653 /* Arm the timer */ 1670 /* Arm the timer */
1654 if (timeout) { 1671 if (timeout) {
@@ -1658,8 +1675,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1658 } 1675 }
1659 1676
1660 /* 1677 /*
1661 * !plist_node_empty() is safe here without any lock. 1678 * If we have been removed from the hash list, then another task
1662 * q.lock_ptr != 0 is not safe, because of ordering against wakeup. 1679 * has tried to wake us, and we can skip the call to schedule().
1663 */ 1680 */
1664 if (likely(!plist_node_empty(&q->list))) { 1681 if (likely(!plist_node_empty(&q->list))) {
1665 /* 1682 /*
@@ -1776,6 +1793,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
1776 current->timer_slack_ns); 1793 current->timer_slack_ns);
1777 } 1794 }
1778 1795
1796retry:
1779 /* Prepare to wait on uaddr. */ 1797 /* Prepare to wait on uaddr. */
1780 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); 1798 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
1781 if (ret) 1799 if (ret)
@@ -1793,9 +1811,14 @@ static int futex_wait(u32 __user *uaddr, int fshared,
1793 goto out_put_key; 1811 goto out_put_key;
1794 1812
1795 /* 1813 /*
1796 * We expect signal_pending(current), but another thread may 1814 * We expect signal_pending(current), but we might be the
1797 * have handled it for us already. 1815 * victim of a spurious wakeup as well.
1798 */ 1816 */
1817 if (!signal_pending(current)) {
1818 put_futex_key(fshared, &q.key);
1819 goto retry;
1820 }
1821
1799 ret = -ERESTARTSYS; 1822 ret = -ERESTARTSYS;
1800 if (!abs_time) 1823 if (!abs_time)
1801 goto out_put_key; 1824 goto out_put_key;
@@ -2102,11 +2125,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2102 * Unqueue the futex_q and determine which it was. 2125 * Unqueue the futex_q and determine which it was.
2103 */ 2126 */
2104 plist_del(&q->list, &q->list.plist); 2127 plist_del(&q->list, &q->list.plist);
2105 drop_futex_key_refs(&q->key);
2106 2128
2129 /* Handle spurious wakeups gracefully */
2130 ret = -EWOULDBLOCK;
2107 if (timeout && !timeout->task) 2131 if (timeout && !timeout->task)
2108 ret = -ETIMEDOUT; 2132 ret = -ETIMEDOUT;
2109 else 2133 else if (signal_pending(current))
2110 ret = -ERESTARTNOINTR; 2134 ret = -ERESTARTNOINTR;
2111 } 2135 }
2112 return ret; 2136 return ret;
@@ -2114,12 +2138,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2114 2138
2115/** 2139/**
2116 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 2140 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
2117 * @uaddr: the futex we initialyl wait on (non-pi) 2141 * @uaddr: the futex we initially wait on (non-pi)
2118 * @fshared: whether the futexes are shared (1) or not (0). They must be 2142 * @fshared: whether the futexes are shared (1) or not (0). They must be
2119 * the same type, no requeueing from private to shared, etc. 2143 * the same type, no requeueing from private to shared, etc.
2120 * @val: the expected value of uaddr 2144 * @val: the expected value of uaddr
2121 * @abs_time: absolute timeout 2145 * @abs_time: absolute timeout
2122 * @bitset: 32 bit wakeup bitset set by userspace, defaults to all. 2146 * @bitset: 32 bit wakeup bitset set by userspace, defaults to all
2123 * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) 2147 * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)
2124 * @uaddr2: the pi futex we will take prior to returning to user-space 2148 * @uaddr2: the pi futex we will take prior to returning to user-space
2125 * 2149 *
@@ -2246,7 +2270,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2246 res = fixup_owner(uaddr2, fshared, &q, !ret); 2270 res = fixup_owner(uaddr2, fshared, &q, !ret);
2247 /* 2271 /*
2248 * If fixup_owner() returned an error, proprogate that. If it 2272 * If fixup_owner() returned an error, proprogate that. If it
2249 * acquired the lock, clear our -ETIMEDOUT or -EINTR. 2273 * acquired the lock, clear -ETIMEDOUT or -EINTR.
2250 */ 2274 */
2251 if (res) 2275 if (res)
2252 ret = (res < 0) ? res : 0; 2276 ret = (res < 0) ? res : 0;
@@ -2302,9 +2326,9 @@ out:
2302 */ 2326 */
2303 2327
2304/** 2328/**
2305 * sys_set_robust_list - set the robust-futex list head of a task 2329 * sys_set_robust_list() - Set the robust-futex list head of a task
2306 * @head: pointer to the list-head 2330 * @head: pointer to the list-head
2307 * @len: length of the list-head, as userspace expects 2331 * @len: length of the list-head, as userspace expects
2308 */ 2332 */
2309SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, 2333SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2310 size_t, len) 2334 size_t, len)
@@ -2323,10 +2347,10 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2323} 2347}
2324 2348
2325/** 2349/**
2326 * sys_get_robust_list - get the robust-futex list head of a task 2350 * sys_get_robust_list() - Get the robust-futex list head of a task
2327 * @pid: pid of the process [zero for current task] 2351 * @pid: pid of the process [zero for current task]
2328 * @head_ptr: pointer to a list-head pointer, the kernel fills it in 2352 * @head_ptr: pointer to a list-head pointer, the kernel fills it in
2329 * @len_ptr: pointer to a length field, the kernel fills in the header size 2353 * @len_ptr: pointer to a length field, the kernel fills in the header size
2330 */ 2354 */
2331SYSCALL_DEFINE3(get_robust_list, int, pid, 2355SYSCALL_DEFINE3(get_robust_list, int, pid,
2332 struct robust_list_head __user * __user *, head_ptr, 2356 struct robust_list_head __user * __user *, head_ptr,