aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/futex.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c220
1 files changed, 125 insertions, 95 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 248dd119a86e..8e3c3ffe1b9a 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -89,36 +89,36 @@ struct futex_pi_state {
89 union futex_key key; 89 union futex_key key;
90}; 90};
91 91
92/* 92/**
93 * We use this hashed waitqueue instead of a normal wait_queue_t, so 93 * struct futex_q - The hashed futex queue entry, one per waiting task
94 * @task: the task waiting on the futex
95 * @lock_ptr: the hash bucket lock
96 * @key: the key the futex is hashed on
97 * @pi_state: optional priority inheritance state
98 * @rt_waiter: rt_waiter storage for use with requeue_pi
99 * @requeue_pi_key: the requeue_pi target futex key
100 * @bitset: bitset for the optional bitmasked wakeup
101 *
102 * We use this hashed waitqueue, instead of a normal wait_queue_t, so
94 * we can wake only the relevant ones (hashed queues may be shared). 103 * we can wake only the relevant ones (hashed queues may be shared).
95 * 104 *
96 * A futex_q has a woken state, just like tasks have TASK_RUNNING. 105 * A futex_q has a woken state, just like tasks have TASK_RUNNING.
97 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. 106 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
98 * The order of wakup is always to make the first condition true, then 107 * The order of wakup is always to make the first condition true, then
99 * wake up q->waiter, then make the second condition true. 108 * the second.
109 *
110 * PI futexes are typically woken before they are removed from the hash list via
111 * the rt_mutex code. See unqueue_me_pi().
100 */ 112 */
101struct futex_q { 113struct futex_q {
102 struct plist_node list; 114 struct plist_node list;
103 /* Waiter reference */
104 struct task_struct *task;
105 115
106 /* Which hash list lock to use: */ 116 struct task_struct *task;
107 spinlock_t *lock_ptr; 117 spinlock_t *lock_ptr;
108
109 /* Key which the futex is hashed on: */
110 union futex_key key; 118 union futex_key key;
111
112 /* Optional priority inheritance state: */
113 struct futex_pi_state *pi_state; 119 struct futex_pi_state *pi_state;
114
115 /* rt_waiter storage for requeue_pi: */
116 struct rt_mutex_waiter *rt_waiter; 120 struct rt_mutex_waiter *rt_waiter;
117
118 /* The expected requeue pi target futex key: */
119 union futex_key *requeue_pi_key; 121 union futex_key *requeue_pi_key;
120
121 /* Bitset for the optional bitmasked wakeup */
122 u32 bitset; 122 u32 bitset;
123}; 123};
124 124
@@ -150,7 +150,8 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key)
150 */ 150 */
151static inline int match_futex(union futex_key *key1, union futex_key *key2) 151static inline int match_futex(union futex_key *key1, union futex_key *key2)
152{ 152{
153 return (key1->both.word == key2->both.word 153 return (key1 && key2
154 && key1->both.word == key2->both.word
154 && key1->both.ptr == key2->both.ptr 155 && key1->both.ptr == key2->both.ptr
155 && key1->both.offset == key2->both.offset); 156 && key1->both.offset == key2->both.offset);
156} 157}
@@ -198,11 +199,12 @@ static void drop_futex_key_refs(union futex_key *key)
198} 199}
199 200
200/** 201/**
201 * get_futex_key - Get parameters which are the keys for a futex. 202 * get_futex_key() - Get parameters which are the keys for a futex
202 * @uaddr: virtual address of the futex 203 * @uaddr: virtual address of the futex
203 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED 204 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
204 * @key: address where result is stored. 205 * @key: address where result is stored.
205 * @rw: mapping needs to be read/write (values: VERIFY_READ, VERIFY_WRITE) 206 * @rw: mapping needs to be read/write (values: VERIFY_READ,
207 * VERIFY_WRITE)
206 * 208 *
207 * Returns a negative error code or 0 209 * Returns a negative error code or 0
208 * The key words are stored in *key on success. 210 * The key words are stored in *key on success.
@@ -288,8 +290,8 @@ void put_futex_key(int fshared, union futex_key *key)
288 drop_futex_key_refs(key); 290 drop_futex_key_refs(key);
289} 291}
290 292
291/* 293/**
292 * fault_in_user_writeable - fault in user address and verify RW access 294 * fault_in_user_writeable() - Fault in user address and verify RW access
293 * @uaddr: pointer to faulting user space address 295 * @uaddr: pointer to faulting user space address
294 * 296 *
295 * Slow path to fixup the fault we just took in the atomic write 297 * Slow path to fixup the fault we just took in the atomic write
@@ -302,15 +304,21 @@ void put_futex_key(int fshared, union futex_key *key)
302 */ 304 */
303static int fault_in_user_writeable(u32 __user *uaddr) 305static int fault_in_user_writeable(u32 __user *uaddr)
304{ 306{
305 int ret = get_user_pages(current, current->mm, (unsigned long)uaddr, 307 struct mm_struct *mm = current->mm;
306 1, 1, 0, NULL, NULL); 308 int ret;
309
310 down_read(&mm->mmap_sem);
311 ret = get_user_pages(current, mm, (unsigned long)uaddr,
312 1, 1, 0, NULL, NULL);
313 up_read(&mm->mmap_sem);
314
307 return ret < 0 ? ret : 0; 315 return ret < 0 ? ret : 0;
308} 316}
309 317
310/** 318/**
311 * futex_top_waiter() - Return the highest priority waiter on a futex 319 * futex_top_waiter() - Return the highest priority waiter on a futex
312 * @hb: the hash bucket the futex_q's reside in 320 * @hb: the hash bucket the futex_q's reside in
313 * @key: the futex key (to distinguish it from other futex futex_q's) 321 * @key: the futex key (to distinguish it from other futex futex_q's)
314 * 322 *
315 * Must be called with the hb lock held. 323 * Must be called with the hb lock held.
316 */ 324 */
@@ -395,9 +403,9 @@ static void free_pi_state(struct futex_pi_state *pi_state)
395 * and has cleaned up the pi_state already 403 * and has cleaned up the pi_state already
396 */ 404 */
397 if (pi_state->owner) { 405 if (pi_state->owner) {
398 spin_lock_irq(&pi_state->owner->pi_lock); 406 raw_spin_lock_irq(&pi_state->owner->pi_lock);
399 list_del_init(&pi_state->list); 407 list_del_init(&pi_state->list);
400 spin_unlock_irq(&pi_state->owner->pi_lock); 408 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
401 409
402 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); 410 rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
403 } 411 }
@@ -462,18 +470,18 @@ void exit_pi_state_list(struct task_struct *curr)
462 * pi_state_list anymore, but we have to be careful 470 * pi_state_list anymore, but we have to be careful
463 * versus waiters unqueueing themselves: 471 * versus waiters unqueueing themselves:
464 */ 472 */
465 spin_lock_irq(&curr->pi_lock); 473 raw_spin_lock_irq(&curr->pi_lock);
466 while (!list_empty(head)) { 474 while (!list_empty(head)) {
467 475
468 next = head->next; 476 next = head->next;
469 pi_state = list_entry(next, struct futex_pi_state, list); 477 pi_state = list_entry(next, struct futex_pi_state, list);
470 key = pi_state->key; 478 key = pi_state->key;
471 hb = hash_futex(&key); 479 hb = hash_futex(&key);
472 spin_unlock_irq(&curr->pi_lock); 480 raw_spin_unlock_irq(&curr->pi_lock);
473 481
474 spin_lock(&hb->lock); 482 spin_lock(&hb->lock);
475 483
476 spin_lock_irq(&curr->pi_lock); 484 raw_spin_lock_irq(&curr->pi_lock);
477 /* 485 /*
478 * We dropped the pi-lock, so re-check whether this 486 * We dropped the pi-lock, so re-check whether this
479 * task still owns the PI-state: 487 * task still owns the PI-state:
@@ -487,15 +495,15 @@ void exit_pi_state_list(struct task_struct *curr)
487 WARN_ON(list_empty(&pi_state->list)); 495 WARN_ON(list_empty(&pi_state->list));
488 list_del_init(&pi_state->list); 496 list_del_init(&pi_state->list);
489 pi_state->owner = NULL; 497 pi_state->owner = NULL;
490 spin_unlock_irq(&curr->pi_lock); 498 raw_spin_unlock_irq(&curr->pi_lock);
491 499
492 rt_mutex_unlock(&pi_state->pi_mutex); 500 rt_mutex_unlock(&pi_state->pi_mutex);
493 501
494 spin_unlock(&hb->lock); 502 spin_unlock(&hb->lock);
495 503
496 spin_lock_irq(&curr->pi_lock); 504 raw_spin_lock_irq(&curr->pi_lock);
497 } 505 }
498 spin_unlock_irq(&curr->pi_lock); 506 raw_spin_unlock_irq(&curr->pi_lock);
499} 507}
500 508
501static int 509static int
@@ -550,7 +558,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
550 * change of the task flags, we do this protected by 558 * change of the task flags, we do this protected by
551 * p->pi_lock: 559 * p->pi_lock:
552 */ 560 */
553 spin_lock_irq(&p->pi_lock); 561 raw_spin_lock_irq(&p->pi_lock);
554 if (unlikely(p->flags & PF_EXITING)) { 562 if (unlikely(p->flags & PF_EXITING)) {
555 /* 563 /*
556 * The task is on the way out. When PF_EXITPIDONE is 564 * The task is on the way out. When PF_EXITPIDONE is
@@ -559,7 +567,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
559 */ 567 */
560 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; 568 int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
561 569
562 spin_unlock_irq(&p->pi_lock); 570 raw_spin_unlock_irq(&p->pi_lock);
563 put_task_struct(p); 571 put_task_struct(p);
564 return ret; 572 return ret;
565 } 573 }
@@ -578,7 +586,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
578 WARN_ON(!list_empty(&pi_state->list)); 586 WARN_ON(!list_empty(&pi_state->list));
579 list_add(&pi_state->list, &p->pi_state_list); 587 list_add(&pi_state->list, &p->pi_state_list);
580 pi_state->owner = p; 588 pi_state->owner = p;
581 spin_unlock_irq(&p->pi_lock); 589 raw_spin_unlock_irq(&p->pi_lock);
582 590
583 put_task_struct(p); 591 put_task_struct(p);
584 592
@@ -588,7 +596,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
588} 596}
589 597
590/** 598/**
591 * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex 599 * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
592 * @uaddr: the pi futex user address 600 * @uaddr: the pi futex user address
593 * @hb: the pi futex hash bucket 601 * @hb: the pi futex hash bucket
594 * @key: the futex key associated with uaddr and hb 602 * @key: the futex key associated with uaddr and hb
@@ -752,7 +760,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
752 if (!pi_state) 760 if (!pi_state)
753 return -EINVAL; 761 return -EINVAL;
754 762
755 spin_lock(&pi_state->pi_mutex.wait_lock); 763 raw_spin_lock(&pi_state->pi_mutex.wait_lock);
756 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); 764 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
757 765
758 /* 766 /*
@@ -781,23 +789,23 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
781 else if (curval != uval) 789 else if (curval != uval)
782 ret = -EINVAL; 790 ret = -EINVAL;
783 if (ret) { 791 if (ret) {
784 spin_unlock(&pi_state->pi_mutex.wait_lock); 792 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
785 return ret; 793 return ret;
786 } 794 }
787 } 795 }
788 796
789 spin_lock_irq(&pi_state->owner->pi_lock); 797 raw_spin_lock_irq(&pi_state->owner->pi_lock);
790 WARN_ON(list_empty(&pi_state->list)); 798 WARN_ON(list_empty(&pi_state->list));
791 list_del_init(&pi_state->list); 799 list_del_init(&pi_state->list);
792 spin_unlock_irq(&pi_state->owner->pi_lock); 800 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
793 801
794 spin_lock_irq(&new_owner->pi_lock); 802 raw_spin_lock_irq(&new_owner->pi_lock);
795 WARN_ON(!list_empty(&pi_state->list)); 803 WARN_ON(!list_empty(&pi_state->list));
796 list_add(&pi_state->list, &new_owner->pi_state_list); 804 list_add(&pi_state->list, &new_owner->pi_state_list);
797 pi_state->owner = new_owner; 805 pi_state->owner = new_owner;
798 spin_unlock_irq(&new_owner->pi_lock); 806 raw_spin_unlock_irq(&new_owner->pi_lock);
799 807
800 spin_unlock(&pi_state->pi_mutex.wait_lock); 808 raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
801 rt_mutex_unlock(&pi_state->pi_mutex); 809 rt_mutex_unlock(&pi_state->pi_mutex);
802 810
803 return 0; 811 return 0;
@@ -915,8 +923,8 @@ retry:
915 hb1 = hash_futex(&key1); 923 hb1 = hash_futex(&key1);
916 hb2 = hash_futex(&key2); 924 hb2 = hash_futex(&key2);
917 925
918 double_lock_hb(hb1, hb2);
919retry_private: 926retry_private:
927 double_lock_hb(hb1, hb2);
920 op_ret = futex_atomic_op_inuser(op, uaddr2); 928 op_ret = futex_atomic_op_inuser(op, uaddr2);
921 if (unlikely(op_ret < 0)) { 929 if (unlikely(op_ret < 0)) {
922 930
@@ -1002,7 +1010,7 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1002 plist_add(&q->list, &hb2->chain); 1010 plist_add(&q->list, &hb2->chain);
1003 q->lock_ptr = &hb2->lock; 1011 q->lock_ptr = &hb2->lock;
1004#ifdef CONFIG_DEBUG_PI_LIST 1012#ifdef CONFIG_DEBUG_PI_LIST
1005 q->list.plist.lock = &hb2->lock; 1013 q->list.plist.spinlock = &hb2->lock;
1006#endif 1014#endif
1007 } 1015 }
1008 get_futex_key_refs(key2); 1016 get_futex_key_refs(key2);
@@ -1011,9 +1019,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1011 1019
1012/** 1020/**
1013 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue 1021 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
1014 * q: the futex_q 1022 * @q: the futex_q
1015 * key: the key of the requeue target futex 1023 * @key: the key of the requeue target futex
1016 * hb: the hash_bucket of the requeue target futex 1024 * @hb: the hash_bucket of the requeue target futex
1017 * 1025 *
1018 * During futex_requeue, with requeue_pi=1, it is possible to acquire the 1026 * During futex_requeue, with requeue_pi=1, it is possible to acquire the
1019 * target futex if it is uncontended or via a lock steal. Set the futex_q key 1027 * target futex if it is uncontended or via a lock steal. Set the futex_q key
@@ -1027,7 +1035,6 @@ static inline
1027void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, 1035void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1028 struct futex_hash_bucket *hb) 1036 struct futex_hash_bucket *hb)
1029{ 1037{
1030 drop_futex_key_refs(&q->key);
1031 get_futex_key_refs(key); 1038 get_futex_key_refs(key);
1032 q->key = *key; 1039 q->key = *key;
1033 1040
@@ -1039,7 +1046,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1039 1046
1040 q->lock_ptr = &hb->lock; 1047 q->lock_ptr = &hb->lock;
1041#ifdef CONFIG_DEBUG_PI_LIST 1048#ifdef CONFIG_DEBUG_PI_LIST
1042 q->list.plist.lock = &hb->lock; 1049 q->list.plist.spinlock = &hb->lock;
1043#endif 1050#endif
1044 1051
1045 wake_up_state(q->task, TASK_NORMAL); 1052 wake_up_state(q->task, TASK_NORMAL);
@@ -1225,6 +1232,7 @@ retry_private:
1225 */ 1232 */
1226 if (ret == 1) { 1233 if (ret == 1) {
1227 WARN_ON(pi_state); 1234 WARN_ON(pi_state);
1235 drop_count++;
1228 task_count++; 1236 task_count++;
1229 ret = get_futex_value_locked(&curval2, uaddr2); 1237 ret = get_futex_value_locked(&curval2, uaddr2);
1230 if (!ret) 1238 if (!ret)
@@ -1303,6 +1311,7 @@ retry_private:
1303 if (ret == 1) { 1311 if (ret == 1) {
1304 /* We got the lock. */ 1312 /* We got the lock. */
1305 requeue_pi_wake_futex(this, &key2, hb2); 1313 requeue_pi_wake_futex(this, &key2, hb2);
1314 drop_count++;
1306 continue; 1315 continue;
1307 } else if (ret) { 1316 } else if (ret) {
1308 /* -EDEADLK */ 1317 /* -EDEADLK */
@@ -1350,6 +1359,25 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1350 return hb; 1359 return hb;
1351} 1360}
1352 1361
1362static inline void
1363queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1364{
1365 spin_unlock(&hb->lock);
1366 drop_futex_key_refs(&q->key);
1367}
1368
1369/**
1370 * queue_me() - Enqueue the futex_q on the futex_hash_bucket
1371 * @q: The futex_q to enqueue
1372 * @hb: The destination hash bucket
1373 *
1374 * The hb->lock must be held by the caller, and is released here. A call to
1375 * queue_me() is typically paired with exactly one call to unqueue_me(). The
1376 * exceptions involve the PI related operations, which may use unqueue_me_pi()
1377 * or nothing if the unqueue is done as part of the wake process and the unqueue
1378 * state is implicit in the state of woken task (see futex_wait_requeue_pi() for
1379 * an example).
1380 */
1353static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) 1381static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1354{ 1382{
1355 int prio; 1383 int prio;
@@ -1366,26 +1394,24 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1366 1394
1367 plist_node_init(&q->list, prio); 1395 plist_node_init(&q->list, prio);
1368#ifdef CONFIG_DEBUG_PI_LIST 1396#ifdef CONFIG_DEBUG_PI_LIST
1369 q->list.plist.lock = &hb->lock; 1397 q->list.plist.spinlock = &hb->lock;
1370#endif 1398#endif
1371 plist_add(&q->list, &hb->chain); 1399 plist_add(&q->list, &hb->chain);
1372 q->task = current; 1400 q->task = current;
1373 spin_unlock(&hb->lock); 1401 spin_unlock(&hb->lock);
1374} 1402}
1375 1403
1376static inline void 1404/**
1377queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) 1405 * unqueue_me() - Remove the futex_q from its futex_hash_bucket
1378{ 1406 * @q: The futex_q to unqueue
1379 spin_unlock(&hb->lock); 1407 *
1380 drop_futex_key_refs(&q->key); 1408 * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
1381} 1409 * be paired with exactly one earlier call to queue_me().
1382 1410 *
1383/* 1411 * Returns:
1384 * queue_me and unqueue_me must be called as a pair, each 1412 * 1 - if the futex_q was still queued (and we removed unqueued it)
1385 * exactly once. They are called with the hashed spinlock held. 1413 * 0 - if the futex_q was already removed by the waking thread
1386 */ 1414 */
1387
1388/* Return 1 if we were still queued (ie. 0 means we were woken) */
1389static int unqueue_me(struct futex_q *q) 1415static int unqueue_me(struct futex_q *q)
1390{ 1416{
1391 spinlock_t *lock_ptr; 1417 spinlock_t *lock_ptr;
@@ -1503,18 +1529,18 @@ retry:
1503 * itself. 1529 * itself.
1504 */ 1530 */
1505 if (pi_state->owner != NULL) { 1531 if (pi_state->owner != NULL) {
1506 spin_lock_irq(&pi_state->owner->pi_lock); 1532 raw_spin_lock_irq(&pi_state->owner->pi_lock);
1507 WARN_ON(list_empty(&pi_state->list)); 1533 WARN_ON(list_empty(&pi_state->list));
1508 list_del_init(&pi_state->list); 1534 list_del_init(&pi_state->list);
1509 spin_unlock_irq(&pi_state->owner->pi_lock); 1535 raw_spin_unlock_irq(&pi_state->owner->pi_lock);
1510 } 1536 }
1511 1537
1512 pi_state->owner = newowner; 1538 pi_state->owner = newowner;
1513 1539
1514 spin_lock_irq(&newowner->pi_lock); 1540 raw_spin_lock_irq(&newowner->pi_lock);
1515 WARN_ON(!list_empty(&pi_state->list)); 1541 WARN_ON(!list_empty(&pi_state->list));
1516 list_add(&pi_state->list, &newowner->pi_state_list); 1542 list_add(&pi_state->list, &newowner->pi_state_list);
1517 spin_unlock_irq(&newowner->pi_lock); 1543 raw_spin_unlock_irq(&newowner->pi_lock);
1518 return 0; 1544 return 0;
1519 1545
1520 /* 1546 /*
@@ -1638,17 +1664,14 @@ out:
1638static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, 1664static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1639 struct hrtimer_sleeper *timeout) 1665 struct hrtimer_sleeper *timeout)
1640{ 1666{
1641 queue_me(q, hb);
1642
1643 /* 1667 /*
1644 * There might have been scheduling since the queue_me(), as we 1668 * The task state is guaranteed to be set before another task can
1645 * cannot hold a spinlock across the get_user() in case it 1669 * wake it. set_current_state() is implemented using set_mb() and
1646 * faults, and we cannot just set TASK_INTERRUPTIBLE state when 1670 * queue_me() calls spin_unlock() upon completion, both serializing
1647 * queueing ourselves into the futex hash. This code thus has to 1671 * access to the hash list and forcing another memory barrier.
1648 * rely on the futex_wake() code removing us from hash when it
1649 * wakes us up.
1650 */ 1672 */
1651 set_current_state(TASK_INTERRUPTIBLE); 1673 set_current_state(TASK_INTERRUPTIBLE);
1674 queue_me(q, hb);
1652 1675
1653 /* Arm the timer */ 1676 /* Arm the timer */
1654 if (timeout) { 1677 if (timeout) {
@@ -1658,8 +1681,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1658 } 1681 }
1659 1682
1660 /* 1683 /*
1661 * !plist_node_empty() is safe here without any lock. 1684 * If we have been removed from the hash list, then another task
1662 * q.lock_ptr != 0 is not safe, because of ordering against wakeup. 1685 * has tried to wake us, and we can skip the call to schedule().
1663 */ 1686 */
1664 if (likely(!plist_node_empty(&q->list))) { 1687 if (likely(!plist_node_empty(&q->list))) {
1665 /* 1688 /*
@@ -1776,6 +1799,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
1776 current->timer_slack_ns); 1799 current->timer_slack_ns);
1777 } 1800 }
1778 1801
1802retry:
1779 /* Prepare to wait on uaddr. */ 1803 /* Prepare to wait on uaddr. */
1780 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); 1804 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
1781 if (ret) 1805 if (ret)
@@ -1793,9 +1817,14 @@ static int futex_wait(u32 __user *uaddr, int fshared,
1793 goto out_put_key; 1817 goto out_put_key;
1794 1818
1795 /* 1819 /*
1796 * We expect signal_pending(current), but another thread may 1820 * We expect signal_pending(current), but we might be the
1797 * have handled it for us already. 1821 * victim of a spurious wakeup as well.
1798 */ 1822 */
1823 if (!signal_pending(current)) {
1824 put_futex_key(fshared, &q.key);
1825 goto retry;
1826 }
1827
1799 ret = -ERESTARTSYS; 1828 ret = -ERESTARTSYS;
1800 if (!abs_time) 1829 if (!abs_time)
1801 goto out_put_key; 1830 goto out_put_key;
@@ -2102,11 +2131,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2102 * Unqueue the futex_q and determine which it was. 2131 * Unqueue the futex_q and determine which it was.
2103 */ 2132 */
2104 plist_del(&q->list, &q->list.plist); 2133 plist_del(&q->list, &q->list.plist);
2105 drop_futex_key_refs(&q->key);
2106 2134
2135 /* Handle spurious wakeups gracefully */
2136 ret = -EWOULDBLOCK;
2107 if (timeout && !timeout->task) 2137 if (timeout && !timeout->task)
2108 ret = -ETIMEDOUT; 2138 ret = -ETIMEDOUT;
2109 else 2139 else if (signal_pending(current))
2110 ret = -ERESTARTNOINTR; 2140 ret = -ERESTARTNOINTR;
2111 } 2141 }
2112 return ret; 2142 return ret;
@@ -2114,12 +2144,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2114 2144
2115/** 2145/**
2116 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 2146 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
2117 * @uaddr: the futex we initialyl wait on (non-pi) 2147 * @uaddr: the futex we initially wait on (non-pi)
2118 * @fshared: whether the futexes are shared (1) or not (0). They must be 2148 * @fshared: whether the futexes are shared (1) or not (0). They must be
2119 * the same type, no requeueing from private to shared, etc. 2149 * the same type, no requeueing from private to shared, etc.
2120 * @val: the expected value of uaddr 2150 * @val: the expected value of uaddr
2121 * @abs_time: absolute timeout 2151 * @abs_time: absolute timeout
2122 * @bitset: 32 bit wakeup bitset set by userspace, defaults to all. 2152 * @bitset: 32 bit wakeup bitset set by userspace, defaults to all
2123 * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) 2153 * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)
2124 * @uaddr2: the pi futex we will take prior to returning to user-space 2154 * @uaddr2: the pi futex we will take prior to returning to user-space
2125 * 2155 *
@@ -2246,7 +2276,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2246 res = fixup_owner(uaddr2, fshared, &q, !ret); 2276 res = fixup_owner(uaddr2, fshared, &q, !ret);
2247 /* 2277 /*
2248 * If fixup_owner() returned an error, proprogate that. If it 2278 * If fixup_owner() returned an error, proprogate that. If it
2249 * acquired the lock, clear our -ETIMEDOUT or -EINTR. 2279 * acquired the lock, clear -ETIMEDOUT or -EINTR.
2250 */ 2280 */
2251 if (res) 2281 if (res)
2252 ret = (res < 0) ? res : 0; 2282 ret = (res < 0) ? res : 0;
@@ -2302,9 +2332,9 @@ out:
2302 */ 2332 */
2303 2333
2304/** 2334/**
2305 * sys_set_robust_list - set the robust-futex list head of a task 2335 * sys_set_robust_list() - Set the robust-futex list head of a task
2306 * @head: pointer to the list-head 2336 * @head: pointer to the list-head
2307 * @len: length of the list-head, as userspace expects 2337 * @len: length of the list-head, as userspace expects
2308 */ 2338 */
2309SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, 2339SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2310 size_t, len) 2340 size_t, len)
@@ -2323,10 +2353,10 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
2323} 2353}
2324 2354
2325/** 2355/**
2326 * sys_get_robust_list - get the robust-futex list head of a task 2356 * sys_get_robust_list() - Get the robust-futex list head of a task
2327 * @pid: pid of the process [zero for current task] 2357 * @pid: pid of the process [zero for current task]
2328 * @head_ptr: pointer to a list-head pointer, the kernel fills it in 2358 * @head_ptr: pointer to a list-head pointer, the kernel fills it in
2329 * @len_ptr: pointer to a length field, the kernel fills in the header size 2359 * @len_ptr: pointer to a length field, the kernel fills in the header size
2330 */ 2360 */
2331SYSCALL_DEFINE3(get_robust_list, int, pid, 2361SYSCALL_DEFINE3(get_robust_list, int, pid,
2332 struct robust_list_head __user * __user *, head_ptr, 2362 struct robust_list_head __user * __user *, head_ptr,