diff options
Diffstat (limited to 'kernel/futex.c')
-rw-r--r-- | kernel/futex.c | 220 |
1 files changed, 125 insertions, 95 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 248dd119a86e..8e3c3ffe1b9a 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -89,36 +89,36 @@ struct futex_pi_state { | |||
89 | union futex_key key; | 89 | union futex_key key; |
90 | }; | 90 | }; |
91 | 91 | ||
92 | /* | 92 | /** |
93 | * We use this hashed waitqueue instead of a normal wait_queue_t, so | 93 | * struct futex_q - The hashed futex queue entry, one per waiting task |
94 | * @task: the task waiting on the futex | ||
95 | * @lock_ptr: the hash bucket lock | ||
96 | * @key: the key the futex is hashed on | ||
97 | * @pi_state: optional priority inheritance state | ||
98 | * @rt_waiter: rt_waiter storage for use with requeue_pi | ||
99 | * @requeue_pi_key: the requeue_pi target futex key | ||
100 | * @bitset: bitset for the optional bitmasked wakeup | ||
101 | * | ||
102 | * We use this hashed waitqueue, instead of a normal wait_queue_t, so | ||
94 | * we can wake only the relevant ones (hashed queues may be shared). | 103 | * we can wake only the relevant ones (hashed queues may be shared). |
95 | * | 104 | * |
96 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. | 105 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. |
97 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. | 106 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. |
98 | * The order of wakup is always to make the first condition true, then | 107 | * The order of wakup is always to make the first condition true, then |
99 | * wake up q->waiter, then make the second condition true. | 108 | * the second. |
109 | * | ||
110 | * PI futexes are typically woken before they are removed from the hash list via | ||
111 | * the rt_mutex code. See unqueue_me_pi(). | ||
100 | */ | 112 | */ |
101 | struct futex_q { | 113 | struct futex_q { |
102 | struct plist_node list; | 114 | struct plist_node list; |
103 | /* Waiter reference */ | ||
104 | struct task_struct *task; | ||
105 | 115 | ||
106 | /* Which hash list lock to use: */ | 116 | struct task_struct *task; |
107 | spinlock_t *lock_ptr; | 117 | spinlock_t *lock_ptr; |
108 | |||
109 | /* Key which the futex is hashed on: */ | ||
110 | union futex_key key; | 118 | union futex_key key; |
111 | |||
112 | /* Optional priority inheritance state: */ | ||
113 | struct futex_pi_state *pi_state; | 119 | struct futex_pi_state *pi_state; |
114 | |||
115 | /* rt_waiter storage for requeue_pi: */ | ||
116 | struct rt_mutex_waiter *rt_waiter; | 120 | struct rt_mutex_waiter *rt_waiter; |
117 | |||
118 | /* The expected requeue pi target futex key: */ | ||
119 | union futex_key *requeue_pi_key; | 121 | union futex_key *requeue_pi_key; |
120 | |||
121 | /* Bitset for the optional bitmasked wakeup */ | ||
122 | u32 bitset; | 122 | u32 bitset; |
123 | }; | 123 | }; |
124 | 124 | ||
@@ -150,7 +150,8 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key) | |||
150 | */ | 150 | */ |
151 | static inline int match_futex(union futex_key *key1, union futex_key *key2) | 151 | static inline int match_futex(union futex_key *key1, union futex_key *key2) |
152 | { | 152 | { |
153 | return (key1->both.word == key2->both.word | 153 | return (key1 && key2 |
154 | && key1->both.word == key2->both.word | ||
154 | && key1->both.ptr == key2->both.ptr | 155 | && key1->both.ptr == key2->both.ptr |
155 | && key1->both.offset == key2->both.offset); | 156 | && key1->both.offset == key2->both.offset); |
156 | } | 157 | } |
@@ -198,11 +199,12 @@ static void drop_futex_key_refs(union futex_key *key) | |||
198 | } | 199 | } |
199 | 200 | ||
200 | /** | 201 | /** |
201 | * get_futex_key - Get parameters which are the keys for a futex. | 202 | * get_futex_key() - Get parameters which are the keys for a futex |
202 | * @uaddr: virtual address of the futex | 203 | * @uaddr: virtual address of the futex |
203 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED | 204 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED |
204 | * @key: address where result is stored. | 205 | * @key: address where result is stored. |
205 | * @rw: mapping needs to be read/write (values: VERIFY_READ, VERIFY_WRITE) | 206 | * @rw: mapping needs to be read/write (values: VERIFY_READ, |
207 | * VERIFY_WRITE) | ||
206 | * | 208 | * |
207 | * Returns a negative error code or 0 | 209 | * Returns a negative error code or 0 |
208 | * The key words are stored in *key on success. | 210 | * The key words are stored in *key on success. |
@@ -288,8 +290,8 @@ void put_futex_key(int fshared, union futex_key *key) | |||
288 | drop_futex_key_refs(key); | 290 | drop_futex_key_refs(key); |
289 | } | 291 | } |
290 | 292 | ||
291 | /* | 293 | /** |
292 | * fault_in_user_writeable - fault in user address and verify RW access | 294 | * fault_in_user_writeable() - Fault in user address and verify RW access |
293 | * @uaddr: pointer to faulting user space address | 295 | * @uaddr: pointer to faulting user space address |
294 | * | 296 | * |
295 | * Slow path to fixup the fault we just took in the atomic write | 297 | * Slow path to fixup the fault we just took in the atomic write |
@@ -302,15 +304,21 @@ void put_futex_key(int fshared, union futex_key *key) | |||
302 | */ | 304 | */ |
303 | static int fault_in_user_writeable(u32 __user *uaddr) | 305 | static int fault_in_user_writeable(u32 __user *uaddr) |
304 | { | 306 | { |
305 | int ret = get_user_pages(current, current->mm, (unsigned long)uaddr, | 307 | struct mm_struct *mm = current->mm; |
306 | 1, 1, 0, NULL, NULL); | 308 | int ret; |
309 | |||
310 | down_read(&mm->mmap_sem); | ||
311 | ret = get_user_pages(current, mm, (unsigned long)uaddr, | ||
312 | 1, 1, 0, NULL, NULL); | ||
313 | up_read(&mm->mmap_sem); | ||
314 | |||
307 | return ret < 0 ? ret : 0; | 315 | return ret < 0 ? ret : 0; |
308 | } | 316 | } |
309 | 317 | ||
310 | /** | 318 | /** |
311 | * futex_top_waiter() - Return the highest priority waiter on a futex | 319 | * futex_top_waiter() - Return the highest priority waiter on a futex |
312 | * @hb: the hash bucket the futex_q's reside in | 320 | * @hb: the hash bucket the futex_q's reside in |
313 | * @key: the futex key (to distinguish it from other futex futex_q's) | 321 | * @key: the futex key (to distinguish it from other futex futex_q's) |
314 | * | 322 | * |
315 | * Must be called with the hb lock held. | 323 | * Must be called with the hb lock held. |
316 | */ | 324 | */ |
@@ -395,9 +403,9 @@ static void free_pi_state(struct futex_pi_state *pi_state) | |||
395 | * and has cleaned up the pi_state already | 403 | * and has cleaned up the pi_state already |
396 | */ | 404 | */ |
397 | if (pi_state->owner) { | 405 | if (pi_state->owner) { |
398 | spin_lock_irq(&pi_state->owner->pi_lock); | 406 | raw_spin_lock_irq(&pi_state->owner->pi_lock); |
399 | list_del_init(&pi_state->list); | 407 | list_del_init(&pi_state->list); |
400 | spin_unlock_irq(&pi_state->owner->pi_lock); | 408 | raw_spin_unlock_irq(&pi_state->owner->pi_lock); |
401 | 409 | ||
402 | rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); | 410 | rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); |
403 | } | 411 | } |
@@ -462,18 +470,18 @@ void exit_pi_state_list(struct task_struct *curr) | |||
462 | * pi_state_list anymore, but we have to be careful | 470 | * pi_state_list anymore, but we have to be careful |
463 | * versus waiters unqueueing themselves: | 471 | * versus waiters unqueueing themselves: |
464 | */ | 472 | */ |
465 | spin_lock_irq(&curr->pi_lock); | 473 | raw_spin_lock_irq(&curr->pi_lock); |
466 | while (!list_empty(head)) { | 474 | while (!list_empty(head)) { |
467 | 475 | ||
468 | next = head->next; | 476 | next = head->next; |
469 | pi_state = list_entry(next, struct futex_pi_state, list); | 477 | pi_state = list_entry(next, struct futex_pi_state, list); |
470 | key = pi_state->key; | 478 | key = pi_state->key; |
471 | hb = hash_futex(&key); | 479 | hb = hash_futex(&key); |
472 | spin_unlock_irq(&curr->pi_lock); | 480 | raw_spin_unlock_irq(&curr->pi_lock); |
473 | 481 | ||
474 | spin_lock(&hb->lock); | 482 | spin_lock(&hb->lock); |
475 | 483 | ||
476 | spin_lock_irq(&curr->pi_lock); | 484 | raw_spin_lock_irq(&curr->pi_lock); |
477 | /* | 485 | /* |
478 | * We dropped the pi-lock, so re-check whether this | 486 | * We dropped the pi-lock, so re-check whether this |
479 | * task still owns the PI-state: | 487 | * task still owns the PI-state: |
@@ -487,15 +495,15 @@ void exit_pi_state_list(struct task_struct *curr) | |||
487 | WARN_ON(list_empty(&pi_state->list)); | 495 | WARN_ON(list_empty(&pi_state->list)); |
488 | list_del_init(&pi_state->list); | 496 | list_del_init(&pi_state->list); |
489 | pi_state->owner = NULL; | 497 | pi_state->owner = NULL; |
490 | spin_unlock_irq(&curr->pi_lock); | 498 | raw_spin_unlock_irq(&curr->pi_lock); |
491 | 499 | ||
492 | rt_mutex_unlock(&pi_state->pi_mutex); | 500 | rt_mutex_unlock(&pi_state->pi_mutex); |
493 | 501 | ||
494 | spin_unlock(&hb->lock); | 502 | spin_unlock(&hb->lock); |
495 | 503 | ||
496 | spin_lock_irq(&curr->pi_lock); | 504 | raw_spin_lock_irq(&curr->pi_lock); |
497 | } | 505 | } |
498 | spin_unlock_irq(&curr->pi_lock); | 506 | raw_spin_unlock_irq(&curr->pi_lock); |
499 | } | 507 | } |
500 | 508 | ||
501 | static int | 509 | static int |
@@ -550,7 +558,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
550 | * change of the task flags, we do this protected by | 558 | * change of the task flags, we do this protected by |
551 | * p->pi_lock: | 559 | * p->pi_lock: |
552 | */ | 560 | */ |
553 | spin_lock_irq(&p->pi_lock); | 561 | raw_spin_lock_irq(&p->pi_lock); |
554 | if (unlikely(p->flags & PF_EXITING)) { | 562 | if (unlikely(p->flags & PF_EXITING)) { |
555 | /* | 563 | /* |
556 | * The task is on the way out. When PF_EXITPIDONE is | 564 | * The task is on the way out. When PF_EXITPIDONE is |
@@ -559,7 +567,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
559 | */ | 567 | */ |
560 | int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; | 568 | int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; |
561 | 569 | ||
562 | spin_unlock_irq(&p->pi_lock); | 570 | raw_spin_unlock_irq(&p->pi_lock); |
563 | put_task_struct(p); | 571 | put_task_struct(p); |
564 | return ret; | 572 | return ret; |
565 | } | 573 | } |
@@ -578,7 +586,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
578 | WARN_ON(!list_empty(&pi_state->list)); | 586 | WARN_ON(!list_empty(&pi_state->list)); |
579 | list_add(&pi_state->list, &p->pi_state_list); | 587 | list_add(&pi_state->list, &p->pi_state_list); |
580 | pi_state->owner = p; | 588 | pi_state->owner = p; |
581 | spin_unlock_irq(&p->pi_lock); | 589 | raw_spin_unlock_irq(&p->pi_lock); |
582 | 590 | ||
583 | put_task_struct(p); | 591 | put_task_struct(p); |
584 | 592 | ||
@@ -588,7 +596,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
588 | } | 596 | } |
589 | 597 | ||
590 | /** | 598 | /** |
591 | * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex | 599 | * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex |
592 | * @uaddr: the pi futex user address | 600 | * @uaddr: the pi futex user address |
593 | * @hb: the pi futex hash bucket | 601 | * @hb: the pi futex hash bucket |
594 | * @key: the futex key associated with uaddr and hb | 602 | * @key: the futex key associated with uaddr and hb |
@@ -752,7 +760,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
752 | if (!pi_state) | 760 | if (!pi_state) |
753 | return -EINVAL; | 761 | return -EINVAL; |
754 | 762 | ||
755 | spin_lock(&pi_state->pi_mutex.wait_lock); | 763 | raw_spin_lock(&pi_state->pi_mutex.wait_lock); |
756 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); | 764 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); |
757 | 765 | ||
758 | /* | 766 | /* |
@@ -781,23 +789,23 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
781 | else if (curval != uval) | 789 | else if (curval != uval) |
782 | ret = -EINVAL; | 790 | ret = -EINVAL; |
783 | if (ret) { | 791 | if (ret) { |
784 | spin_unlock(&pi_state->pi_mutex.wait_lock); | 792 | raw_spin_unlock(&pi_state->pi_mutex.wait_lock); |
785 | return ret; | 793 | return ret; |
786 | } | 794 | } |
787 | } | 795 | } |
788 | 796 | ||
789 | spin_lock_irq(&pi_state->owner->pi_lock); | 797 | raw_spin_lock_irq(&pi_state->owner->pi_lock); |
790 | WARN_ON(list_empty(&pi_state->list)); | 798 | WARN_ON(list_empty(&pi_state->list)); |
791 | list_del_init(&pi_state->list); | 799 | list_del_init(&pi_state->list); |
792 | spin_unlock_irq(&pi_state->owner->pi_lock); | 800 | raw_spin_unlock_irq(&pi_state->owner->pi_lock); |
793 | 801 | ||
794 | spin_lock_irq(&new_owner->pi_lock); | 802 | raw_spin_lock_irq(&new_owner->pi_lock); |
795 | WARN_ON(!list_empty(&pi_state->list)); | 803 | WARN_ON(!list_empty(&pi_state->list)); |
796 | list_add(&pi_state->list, &new_owner->pi_state_list); | 804 | list_add(&pi_state->list, &new_owner->pi_state_list); |
797 | pi_state->owner = new_owner; | 805 | pi_state->owner = new_owner; |
798 | spin_unlock_irq(&new_owner->pi_lock); | 806 | raw_spin_unlock_irq(&new_owner->pi_lock); |
799 | 807 | ||
800 | spin_unlock(&pi_state->pi_mutex.wait_lock); | 808 | raw_spin_unlock(&pi_state->pi_mutex.wait_lock); |
801 | rt_mutex_unlock(&pi_state->pi_mutex); | 809 | rt_mutex_unlock(&pi_state->pi_mutex); |
802 | 810 | ||
803 | return 0; | 811 | return 0; |
@@ -915,8 +923,8 @@ retry: | |||
915 | hb1 = hash_futex(&key1); | 923 | hb1 = hash_futex(&key1); |
916 | hb2 = hash_futex(&key2); | 924 | hb2 = hash_futex(&key2); |
917 | 925 | ||
918 | double_lock_hb(hb1, hb2); | ||
919 | retry_private: | 926 | retry_private: |
927 | double_lock_hb(hb1, hb2); | ||
920 | op_ret = futex_atomic_op_inuser(op, uaddr2); | 928 | op_ret = futex_atomic_op_inuser(op, uaddr2); |
921 | if (unlikely(op_ret < 0)) { | 929 | if (unlikely(op_ret < 0)) { |
922 | 930 | ||
@@ -1002,7 +1010,7 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, | |||
1002 | plist_add(&q->list, &hb2->chain); | 1010 | plist_add(&q->list, &hb2->chain); |
1003 | q->lock_ptr = &hb2->lock; | 1011 | q->lock_ptr = &hb2->lock; |
1004 | #ifdef CONFIG_DEBUG_PI_LIST | 1012 | #ifdef CONFIG_DEBUG_PI_LIST |
1005 | q->list.plist.lock = &hb2->lock; | 1013 | q->list.plist.spinlock = &hb2->lock; |
1006 | #endif | 1014 | #endif |
1007 | } | 1015 | } |
1008 | get_futex_key_refs(key2); | 1016 | get_futex_key_refs(key2); |
@@ -1011,9 +1019,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, | |||
1011 | 1019 | ||
1012 | /** | 1020 | /** |
1013 | * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue | 1021 | * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue |
1014 | * q: the futex_q | 1022 | * @q: the futex_q |
1015 | * key: the key of the requeue target futex | 1023 | * @key: the key of the requeue target futex |
1016 | * hb: the hash_bucket of the requeue target futex | 1024 | * @hb: the hash_bucket of the requeue target futex |
1017 | * | 1025 | * |
1018 | * During futex_requeue, with requeue_pi=1, it is possible to acquire the | 1026 | * During futex_requeue, with requeue_pi=1, it is possible to acquire the |
1019 | * target futex if it is uncontended or via a lock steal. Set the futex_q key | 1027 | * target futex if it is uncontended or via a lock steal. Set the futex_q key |
@@ -1027,7 +1035,6 @@ static inline | |||
1027 | void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, | 1035 | void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, |
1028 | struct futex_hash_bucket *hb) | 1036 | struct futex_hash_bucket *hb) |
1029 | { | 1037 | { |
1030 | drop_futex_key_refs(&q->key); | ||
1031 | get_futex_key_refs(key); | 1038 | get_futex_key_refs(key); |
1032 | q->key = *key; | 1039 | q->key = *key; |
1033 | 1040 | ||
@@ -1039,7 +1046,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, | |||
1039 | 1046 | ||
1040 | q->lock_ptr = &hb->lock; | 1047 | q->lock_ptr = &hb->lock; |
1041 | #ifdef CONFIG_DEBUG_PI_LIST | 1048 | #ifdef CONFIG_DEBUG_PI_LIST |
1042 | q->list.plist.lock = &hb->lock; | 1049 | q->list.plist.spinlock = &hb->lock; |
1043 | #endif | 1050 | #endif |
1044 | 1051 | ||
1045 | wake_up_state(q->task, TASK_NORMAL); | 1052 | wake_up_state(q->task, TASK_NORMAL); |
@@ -1225,6 +1232,7 @@ retry_private: | |||
1225 | */ | 1232 | */ |
1226 | if (ret == 1) { | 1233 | if (ret == 1) { |
1227 | WARN_ON(pi_state); | 1234 | WARN_ON(pi_state); |
1235 | drop_count++; | ||
1228 | task_count++; | 1236 | task_count++; |
1229 | ret = get_futex_value_locked(&curval2, uaddr2); | 1237 | ret = get_futex_value_locked(&curval2, uaddr2); |
1230 | if (!ret) | 1238 | if (!ret) |
@@ -1303,6 +1311,7 @@ retry_private: | |||
1303 | if (ret == 1) { | 1311 | if (ret == 1) { |
1304 | /* We got the lock. */ | 1312 | /* We got the lock. */ |
1305 | requeue_pi_wake_futex(this, &key2, hb2); | 1313 | requeue_pi_wake_futex(this, &key2, hb2); |
1314 | drop_count++; | ||
1306 | continue; | 1315 | continue; |
1307 | } else if (ret) { | 1316 | } else if (ret) { |
1308 | /* -EDEADLK */ | 1317 | /* -EDEADLK */ |
@@ -1350,6 +1359,25 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | |||
1350 | return hb; | 1359 | return hb; |
1351 | } | 1360 | } |
1352 | 1361 | ||
1362 | static inline void | ||
1363 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | ||
1364 | { | ||
1365 | spin_unlock(&hb->lock); | ||
1366 | drop_futex_key_refs(&q->key); | ||
1367 | } | ||
1368 | |||
1369 | /** | ||
1370 | * queue_me() - Enqueue the futex_q on the futex_hash_bucket | ||
1371 | * @q: The futex_q to enqueue | ||
1372 | * @hb: The destination hash bucket | ||
1373 | * | ||
1374 | * The hb->lock must be held by the caller, and is released here. A call to | ||
1375 | * queue_me() is typically paired with exactly one call to unqueue_me(). The | ||
1376 | * exceptions involve the PI related operations, which may use unqueue_me_pi() | ||
1377 | * or nothing if the unqueue is done as part of the wake process and the unqueue | ||
1378 | * state is implicit in the state of woken task (see futex_wait_requeue_pi() for | ||
1379 | * an example). | ||
1380 | */ | ||
1353 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | 1381 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) |
1354 | { | 1382 | { |
1355 | int prio; | 1383 | int prio; |
@@ -1366,26 +1394,24 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | |||
1366 | 1394 | ||
1367 | plist_node_init(&q->list, prio); | 1395 | plist_node_init(&q->list, prio); |
1368 | #ifdef CONFIG_DEBUG_PI_LIST | 1396 | #ifdef CONFIG_DEBUG_PI_LIST |
1369 | q->list.plist.lock = &hb->lock; | 1397 | q->list.plist.spinlock = &hb->lock; |
1370 | #endif | 1398 | #endif |
1371 | plist_add(&q->list, &hb->chain); | 1399 | plist_add(&q->list, &hb->chain); |
1372 | q->task = current; | 1400 | q->task = current; |
1373 | spin_unlock(&hb->lock); | 1401 | spin_unlock(&hb->lock); |
1374 | } | 1402 | } |
1375 | 1403 | ||
1376 | static inline void | 1404 | /** |
1377 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | 1405 | * unqueue_me() - Remove the futex_q from its futex_hash_bucket |
1378 | { | 1406 | * @q: The futex_q to unqueue |
1379 | spin_unlock(&hb->lock); | 1407 | * |
1380 | drop_futex_key_refs(&q->key); | 1408 | * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must |
1381 | } | 1409 | * be paired with exactly one earlier call to queue_me(). |
1382 | 1410 | * | |
1383 | /* | 1411 | * Returns: |
1384 | * queue_me and unqueue_me must be called as a pair, each | 1412 | * 1 - if the futex_q was still queued (and we removed unqueued it) |
1385 | * exactly once. They are called with the hashed spinlock held. | 1413 | * 0 - if the futex_q was already removed by the waking thread |
1386 | */ | 1414 | */ |
1387 | |||
1388 | /* Return 1 if we were still queued (ie. 0 means we were woken) */ | ||
1389 | static int unqueue_me(struct futex_q *q) | 1415 | static int unqueue_me(struct futex_q *q) |
1390 | { | 1416 | { |
1391 | spinlock_t *lock_ptr; | 1417 | spinlock_t *lock_ptr; |
@@ -1503,18 +1529,18 @@ retry: | |||
1503 | * itself. | 1529 | * itself. |
1504 | */ | 1530 | */ |
1505 | if (pi_state->owner != NULL) { | 1531 | if (pi_state->owner != NULL) { |
1506 | spin_lock_irq(&pi_state->owner->pi_lock); | 1532 | raw_spin_lock_irq(&pi_state->owner->pi_lock); |
1507 | WARN_ON(list_empty(&pi_state->list)); | 1533 | WARN_ON(list_empty(&pi_state->list)); |
1508 | list_del_init(&pi_state->list); | 1534 | list_del_init(&pi_state->list); |
1509 | spin_unlock_irq(&pi_state->owner->pi_lock); | 1535 | raw_spin_unlock_irq(&pi_state->owner->pi_lock); |
1510 | } | 1536 | } |
1511 | 1537 | ||
1512 | pi_state->owner = newowner; | 1538 | pi_state->owner = newowner; |
1513 | 1539 | ||
1514 | spin_lock_irq(&newowner->pi_lock); | 1540 | raw_spin_lock_irq(&newowner->pi_lock); |
1515 | WARN_ON(!list_empty(&pi_state->list)); | 1541 | WARN_ON(!list_empty(&pi_state->list)); |
1516 | list_add(&pi_state->list, &newowner->pi_state_list); | 1542 | list_add(&pi_state->list, &newowner->pi_state_list); |
1517 | spin_unlock_irq(&newowner->pi_lock); | 1543 | raw_spin_unlock_irq(&newowner->pi_lock); |
1518 | return 0; | 1544 | return 0; |
1519 | 1545 | ||
1520 | /* | 1546 | /* |
@@ -1638,17 +1664,14 @@ out: | |||
1638 | static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | 1664 | static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, |
1639 | struct hrtimer_sleeper *timeout) | 1665 | struct hrtimer_sleeper *timeout) |
1640 | { | 1666 | { |
1641 | queue_me(q, hb); | ||
1642 | |||
1643 | /* | 1667 | /* |
1644 | * There might have been scheduling since the queue_me(), as we | 1668 | * The task state is guaranteed to be set before another task can |
1645 | * cannot hold a spinlock across the get_user() in case it | 1669 | * wake it. set_current_state() is implemented using set_mb() and |
1646 | * faults, and we cannot just set TASK_INTERRUPTIBLE state when | 1670 | * queue_me() calls spin_unlock() upon completion, both serializing |
1647 | * queueing ourselves into the futex hash. This code thus has to | 1671 | * access to the hash list and forcing another memory barrier. |
1648 | * rely on the futex_wake() code removing us from hash when it | ||
1649 | * wakes us up. | ||
1650 | */ | 1672 | */ |
1651 | set_current_state(TASK_INTERRUPTIBLE); | 1673 | set_current_state(TASK_INTERRUPTIBLE); |
1674 | queue_me(q, hb); | ||
1652 | 1675 | ||
1653 | /* Arm the timer */ | 1676 | /* Arm the timer */ |
1654 | if (timeout) { | 1677 | if (timeout) { |
@@ -1658,8 +1681,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | |||
1658 | } | 1681 | } |
1659 | 1682 | ||
1660 | /* | 1683 | /* |
1661 | * !plist_node_empty() is safe here without any lock. | 1684 | * If we have been removed from the hash list, then another task |
1662 | * q.lock_ptr != 0 is not safe, because of ordering against wakeup. | 1685 | * has tried to wake us, and we can skip the call to schedule(). |
1663 | */ | 1686 | */ |
1664 | if (likely(!plist_node_empty(&q->list))) { | 1687 | if (likely(!plist_node_empty(&q->list))) { |
1665 | /* | 1688 | /* |
@@ -1776,6 +1799,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
1776 | current->timer_slack_ns); | 1799 | current->timer_slack_ns); |
1777 | } | 1800 | } |
1778 | 1801 | ||
1802 | retry: | ||
1779 | /* Prepare to wait on uaddr. */ | 1803 | /* Prepare to wait on uaddr. */ |
1780 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 1804 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); |
1781 | if (ret) | 1805 | if (ret) |
@@ -1793,9 +1817,14 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
1793 | goto out_put_key; | 1817 | goto out_put_key; |
1794 | 1818 | ||
1795 | /* | 1819 | /* |
1796 | * We expect signal_pending(current), but another thread may | 1820 | * We expect signal_pending(current), but we might be the |
1797 | * have handled it for us already. | 1821 | * victim of a spurious wakeup as well. |
1798 | */ | 1822 | */ |
1823 | if (!signal_pending(current)) { | ||
1824 | put_futex_key(fshared, &q.key); | ||
1825 | goto retry; | ||
1826 | } | ||
1827 | |||
1799 | ret = -ERESTARTSYS; | 1828 | ret = -ERESTARTSYS; |
1800 | if (!abs_time) | 1829 | if (!abs_time) |
1801 | goto out_put_key; | 1830 | goto out_put_key; |
@@ -2102,11 +2131,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2102 | * Unqueue the futex_q and determine which it was. | 2131 | * Unqueue the futex_q and determine which it was. |
2103 | */ | 2132 | */ |
2104 | plist_del(&q->list, &q->list.plist); | 2133 | plist_del(&q->list, &q->list.plist); |
2105 | drop_futex_key_refs(&q->key); | ||
2106 | 2134 | ||
2135 | /* Handle spurious wakeups gracefully */ | ||
2136 | ret = -EWOULDBLOCK; | ||
2107 | if (timeout && !timeout->task) | 2137 | if (timeout && !timeout->task) |
2108 | ret = -ETIMEDOUT; | 2138 | ret = -ETIMEDOUT; |
2109 | else | 2139 | else if (signal_pending(current)) |
2110 | ret = -ERESTARTNOINTR; | 2140 | ret = -ERESTARTNOINTR; |
2111 | } | 2141 | } |
2112 | return ret; | 2142 | return ret; |
@@ -2114,12 +2144,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2114 | 2144 | ||
2115 | /** | 2145 | /** |
2116 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 | 2146 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 |
2117 | * @uaddr: the futex we initialyl wait on (non-pi) | 2147 | * @uaddr: the futex we initially wait on (non-pi) |
2118 | * @fshared: whether the futexes are shared (1) or not (0). They must be | 2148 | * @fshared: whether the futexes are shared (1) or not (0). They must be |
2119 | * the same type, no requeueing from private to shared, etc. | 2149 | * the same type, no requeueing from private to shared, etc. |
2120 | * @val: the expected value of uaddr | 2150 | * @val: the expected value of uaddr |
2121 | * @abs_time: absolute timeout | 2151 | * @abs_time: absolute timeout |
2122 | * @bitset: 32 bit wakeup bitset set by userspace, defaults to all. | 2152 | * @bitset: 32 bit wakeup bitset set by userspace, defaults to all |
2123 | * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) | 2153 | * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) |
2124 | * @uaddr2: the pi futex we will take prior to returning to user-space | 2154 | * @uaddr2: the pi futex we will take prior to returning to user-space |
2125 | * | 2155 | * |
@@ -2246,7 +2276,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2246 | res = fixup_owner(uaddr2, fshared, &q, !ret); | 2276 | res = fixup_owner(uaddr2, fshared, &q, !ret); |
2247 | /* | 2277 | /* |
2248 | * If fixup_owner() returned an error, proprogate that. If it | 2278 | * If fixup_owner() returned an error, proprogate that. If it |
2249 | * acquired the lock, clear our -ETIMEDOUT or -EINTR. | 2279 | * acquired the lock, clear -ETIMEDOUT or -EINTR. |
2250 | */ | 2280 | */ |
2251 | if (res) | 2281 | if (res) |
2252 | ret = (res < 0) ? res : 0; | 2282 | ret = (res < 0) ? res : 0; |
@@ -2302,9 +2332,9 @@ out: | |||
2302 | */ | 2332 | */ |
2303 | 2333 | ||
2304 | /** | 2334 | /** |
2305 | * sys_set_robust_list - set the robust-futex list head of a task | 2335 | * sys_set_robust_list() - Set the robust-futex list head of a task |
2306 | * @head: pointer to the list-head | 2336 | * @head: pointer to the list-head |
2307 | * @len: length of the list-head, as userspace expects | 2337 | * @len: length of the list-head, as userspace expects |
2308 | */ | 2338 | */ |
2309 | SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, | 2339 | SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, |
2310 | size_t, len) | 2340 | size_t, len) |
@@ -2323,10 +2353,10 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, | |||
2323 | } | 2353 | } |
2324 | 2354 | ||
2325 | /** | 2355 | /** |
2326 | * sys_get_robust_list - get the robust-futex list head of a task | 2356 | * sys_get_robust_list() - Get the robust-futex list head of a task |
2327 | * @pid: pid of the process [zero for current task] | 2357 | * @pid: pid of the process [zero for current task] |
2328 | * @head_ptr: pointer to a list-head pointer, the kernel fills it in | 2358 | * @head_ptr: pointer to a list-head pointer, the kernel fills it in |
2329 | * @len_ptr: pointer to a length field, the kernel fills in the header size | 2359 | * @len_ptr: pointer to a length field, the kernel fills in the header size |
2330 | */ | 2360 | */ |
2331 | SYSCALL_DEFINE3(get_robust_list, int, pid, | 2361 | SYSCALL_DEFINE3(get_robust_list, int, pid, |
2332 | struct robust_list_head __user * __user *, head_ptr, | 2362 | struct robust_list_head __user * __user *, head_ptr, |