diff options
Diffstat (limited to 'kernel/futex.c')
-rw-r--r-- | kernel/futex.c | 117 |
1 files changed, 72 insertions, 45 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index fb65e822fc41..e7a35f1039e7 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -203,8 +203,6 @@ static void drop_futex_key_refs(union futex_key *key) | |||
203 | * @uaddr: virtual address of the futex | 203 | * @uaddr: virtual address of the futex |
204 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED | 204 | * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED |
205 | * @key: address where result is stored. | 205 | * @key: address where result is stored. |
206 | * @rw: mapping needs to be read/write (values: VERIFY_READ, | ||
207 | * VERIFY_WRITE) | ||
208 | * | 206 | * |
209 | * Returns a negative error code or 0 | 207 | * Returns a negative error code or 0 |
210 | * The key words are stored in *key on success. | 208 | * The key words are stored in *key on success. |
@@ -216,7 +214,7 @@ static void drop_futex_key_refs(union futex_key *key) | |||
216 | * lock_page() might sleep, the caller should not hold a spinlock. | 214 | * lock_page() might sleep, the caller should not hold a spinlock. |
217 | */ | 215 | */ |
218 | static int | 216 | static int |
219 | get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) | 217 | get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) |
220 | { | 218 | { |
221 | unsigned long address = (unsigned long)uaddr; | 219 | unsigned long address = (unsigned long)uaddr; |
222 | struct mm_struct *mm = current->mm; | 220 | struct mm_struct *mm = current->mm; |
@@ -239,7 +237,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) | |||
239 | * but access_ok() should be faster than find_vma() | 237 | * but access_ok() should be faster than find_vma() |
240 | */ | 238 | */ |
241 | if (!fshared) { | 239 | if (!fshared) { |
242 | if (unlikely(!access_ok(rw, uaddr, sizeof(u32)))) | 240 | if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))) |
243 | return -EFAULT; | 241 | return -EFAULT; |
244 | key->private.mm = mm; | 242 | key->private.mm = mm; |
245 | key->private.address = address; | 243 | key->private.address = address; |
@@ -248,7 +246,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) | |||
248 | } | 246 | } |
249 | 247 | ||
250 | again: | 248 | again: |
251 | err = get_user_pages_fast(address, 1, rw == VERIFY_WRITE, &page); | 249 | err = get_user_pages_fast(address, 1, 1, &page); |
252 | if (err < 0) | 250 | if (err < 0) |
253 | return err; | 251 | return err; |
254 | 252 | ||
@@ -304,8 +302,14 @@ void put_futex_key(int fshared, union futex_key *key) | |||
304 | */ | 302 | */ |
305 | static int fault_in_user_writeable(u32 __user *uaddr) | 303 | static int fault_in_user_writeable(u32 __user *uaddr) |
306 | { | 304 | { |
307 | int ret = get_user_pages(current, current->mm, (unsigned long)uaddr, | 305 | struct mm_struct *mm = current->mm; |
308 | 1, 1, 0, NULL, NULL); | 306 | int ret; |
307 | |||
308 | down_read(&mm->mmap_sem); | ||
309 | ret = get_user_pages(current, mm, (unsigned long)uaddr, | ||
310 | 1, 1, 0, NULL, NULL); | ||
311 | up_read(&mm->mmap_sem); | ||
312 | |||
309 | return ret < 0 ? ret : 0; | 313 | return ret < 0 ? ret : 0; |
310 | } | 314 | } |
311 | 315 | ||
@@ -397,9 +401,9 @@ static void free_pi_state(struct futex_pi_state *pi_state) | |||
397 | * and has cleaned up the pi_state already | 401 | * and has cleaned up the pi_state already |
398 | */ | 402 | */ |
399 | if (pi_state->owner) { | 403 | if (pi_state->owner) { |
400 | spin_lock_irq(&pi_state->owner->pi_lock); | 404 | raw_spin_lock_irq(&pi_state->owner->pi_lock); |
401 | list_del_init(&pi_state->list); | 405 | list_del_init(&pi_state->list); |
402 | spin_unlock_irq(&pi_state->owner->pi_lock); | 406 | raw_spin_unlock_irq(&pi_state->owner->pi_lock); |
403 | 407 | ||
404 | rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); | 408 | rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); |
405 | } | 409 | } |
@@ -464,18 +468,18 @@ void exit_pi_state_list(struct task_struct *curr) | |||
464 | * pi_state_list anymore, but we have to be careful | 468 | * pi_state_list anymore, but we have to be careful |
465 | * versus waiters unqueueing themselves: | 469 | * versus waiters unqueueing themselves: |
466 | */ | 470 | */ |
467 | spin_lock_irq(&curr->pi_lock); | 471 | raw_spin_lock_irq(&curr->pi_lock); |
468 | while (!list_empty(head)) { | 472 | while (!list_empty(head)) { |
469 | 473 | ||
470 | next = head->next; | 474 | next = head->next; |
471 | pi_state = list_entry(next, struct futex_pi_state, list); | 475 | pi_state = list_entry(next, struct futex_pi_state, list); |
472 | key = pi_state->key; | 476 | key = pi_state->key; |
473 | hb = hash_futex(&key); | 477 | hb = hash_futex(&key); |
474 | spin_unlock_irq(&curr->pi_lock); | 478 | raw_spin_unlock_irq(&curr->pi_lock); |
475 | 479 | ||
476 | spin_lock(&hb->lock); | 480 | spin_lock(&hb->lock); |
477 | 481 | ||
478 | spin_lock_irq(&curr->pi_lock); | 482 | raw_spin_lock_irq(&curr->pi_lock); |
479 | /* | 483 | /* |
480 | * We dropped the pi-lock, so re-check whether this | 484 | * We dropped the pi-lock, so re-check whether this |
481 | * task still owns the PI-state: | 485 | * task still owns the PI-state: |
@@ -489,15 +493,15 @@ void exit_pi_state_list(struct task_struct *curr) | |||
489 | WARN_ON(list_empty(&pi_state->list)); | 493 | WARN_ON(list_empty(&pi_state->list)); |
490 | list_del_init(&pi_state->list); | 494 | list_del_init(&pi_state->list); |
491 | pi_state->owner = NULL; | 495 | pi_state->owner = NULL; |
492 | spin_unlock_irq(&curr->pi_lock); | 496 | raw_spin_unlock_irq(&curr->pi_lock); |
493 | 497 | ||
494 | rt_mutex_unlock(&pi_state->pi_mutex); | 498 | rt_mutex_unlock(&pi_state->pi_mutex); |
495 | 499 | ||
496 | spin_unlock(&hb->lock); | 500 | spin_unlock(&hb->lock); |
497 | 501 | ||
498 | spin_lock_irq(&curr->pi_lock); | 502 | raw_spin_lock_irq(&curr->pi_lock); |
499 | } | 503 | } |
500 | spin_unlock_irq(&curr->pi_lock); | 504 | raw_spin_unlock_irq(&curr->pi_lock); |
501 | } | 505 | } |
502 | 506 | ||
503 | static int | 507 | static int |
@@ -526,8 +530,25 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
526 | return -EINVAL; | 530 | return -EINVAL; |
527 | 531 | ||
528 | WARN_ON(!atomic_read(&pi_state->refcount)); | 532 | WARN_ON(!atomic_read(&pi_state->refcount)); |
529 | WARN_ON(pid && pi_state->owner && | 533 | |
530 | pi_state->owner->pid != pid); | 534 | /* |
535 | * When pi_state->owner is NULL then the owner died | ||
536 | * and another waiter is on the fly. pi_state->owner | ||
537 | * is fixed up by the task which acquires | ||
538 | * pi_state->rt_mutex. | ||
539 | * | ||
540 | * We do not check for pid == 0 which can happen when | ||
541 | * the owner died and robust_list_exit() cleared the | ||
542 | * TID. | ||
543 | */ | ||
544 | if (pid && pi_state->owner) { | ||
545 | /* | ||
546 | * Bail out if user space manipulated the | ||
547 | * futex value. | ||
548 | */ | ||
549 | if (pid != task_pid_vnr(pi_state->owner)) | ||
550 | return -EINVAL; | ||
551 | } | ||
531 | 552 | ||
532 | atomic_inc(&pi_state->refcount); | 553 | atomic_inc(&pi_state->refcount); |
533 | *ps = pi_state; | 554 | *ps = pi_state; |
@@ -552,7 +573,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
552 | * change of the task flags, we do this protected by | 573 | * change of the task flags, we do this protected by |
553 | * p->pi_lock: | 574 | * p->pi_lock: |
554 | */ | 575 | */ |
555 | spin_lock_irq(&p->pi_lock); | 576 | raw_spin_lock_irq(&p->pi_lock); |
556 | if (unlikely(p->flags & PF_EXITING)) { | 577 | if (unlikely(p->flags & PF_EXITING)) { |
557 | /* | 578 | /* |
558 | * The task is on the way out. When PF_EXITPIDONE is | 579 | * The task is on the way out. When PF_EXITPIDONE is |
@@ -561,7 +582,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
561 | */ | 582 | */ |
562 | int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; | 583 | int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN; |
563 | 584 | ||
564 | spin_unlock_irq(&p->pi_lock); | 585 | raw_spin_unlock_irq(&p->pi_lock); |
565 | put_task_struct(p); | 586 | put_task_struct(p); |
566 | return ret; | 587 | return ret; |
567 | } | 588 | } |
@@ -580,7 +601,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
580 | WARN_ON(!list_empty(&pi_state->list)); | 601 | WARN_ON(!list_empty(&pi_state->list)); |
581 | list_add(&pi_state->list, &p->pi_state_list); | 602 | list_add(&pi_state->list, &p->pi_state_list); |
582 | pi_state->owner = p; | 603 | pi_state->owner = p; |
583 | spin_unlock_irq(&p->pi_lock); | 604 | raw_spin_unlock_irq(&p->pi_lock); |
584 | 605 | ||
585 | put_task_struct(p); | 606 | put_task_struct(p); |
586 | 607 | ||
@@ -754,7 +775,14 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
754 | if (!pi_state) | 775 | if (!pi_state) |
755 | return -EINVAL; | 776 | return -EINVAL; |
756 | 777 | ||
757 | spin_lock(&pi_state->pi_mutex.wait_lock); | 778 | /* |
779 | * If current does not own the pi_state then the futex is | ||
780 | * inconsistent and user space fiddled with the futex value. | ||
781 | */ | ||
782 | if (pi_state->owner != current) | ||
783 | return -EINVAL; | ||
784 | |||
785 | raw_spin_lock(&pi_state->pi_mutex.wait_lock); | ||
758 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); | 786 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); |
759 | 787 | ||
760 | /* | 788 | /* |
@@ -783,23 +811,23 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
783 | else if (curval != uval) | 811 | else if (curval != uval) |
784 | ret = -EINVAL; | 812 | ret = -EINVAL; |
785 | if (ret) { | 813 | if (ret) { |
786 | spin_unlock(&pi_state->pi_mutex.wait_lock); | 814 | raw_spin_unlock(&pi_state->pi_mutex.wait_lock); |
787 | return ret; | 815 | return ret; |
788 | } | 816 | } |
789 | } | 817 | } |
790 | 818 | ||
791 | spin_lock_irq(&pi_state->owner->pi_lock); | 819 | raw_spin_lock_irq(&pi_state->owner->pi_lock); |
792 | WARN_ON(list_empty(&pi_state->list)); | 820 | WARN_ON(list_empty(&pi_state->list)); |
793 | list_del_init(&pi_state->list); | 821 | list_del_init(&pi_state->list); |
794 | spin_unlock_irq(&pi_state->owner->pi_lock); | 822 | raw_spin_unlock_irq(&pi_state->owner->pi_lock); |
795 | 823 | ||
796 | spin_lock_irq(&new_owner->pi_lock); | 824 | raw_spin_lock_irq(&new_owner->pi_lock); |
797 | WARN_ON(!list_empty(&pi_state->list)); | 825 | WARN_ON(!list_empty(&pi_state->list)); |
798 | list_add(&pi_state->list, &new_owner->pi_state_list); | 826 | list_add(&pi_state->list, &new_owner->pi_state_list); |
799 | pi_state->owner = new_owner; | 827 | pi_state->owner = new_owner; |
800 | spin_unlock_irq(&new_owner->pi_lock); | 828 | raw_spin_unlock_irq(&new_owner->pi_lock); |
801 | 829 | ||
802 | spin_unlock(&pi_state->pi_mutex.wait_lock); | 830 | raw_spin_unlock(&pi_state->pi_mutex.wait_lock); |
803 | rt_mutex_unlock(&pi_state->pi_mutex); | 831 | rt_mutex_unlock(&pi_state->pi_mutex); |
804 | 832 | ||
805 | return 0; | 833 | return 0; |
@@ -861,7 +889,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) | |||
861 | if (!bitset) | 889 | if (!bitset) |
862 | return -EINVAL; | 890 | return -EINVAL; |
863 | 891 | ||
864 | ret = get_futex_key(uaddr, fshared, &key, VERIFY_READ); | 892 | ret = get_futex_key(uaddr, fshared, &key); |
865 | if (unlikely(ret != 0)) | 893 | if (unlikely(ret != 0)) |
866 | goto out; | 894 | goto out; |
867 | 895 | ||
@@ -907,10 +935,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, | |||
907 | int ret, op_ret; | 935 | int ret, op_ret; |
908 | 936 | ||
909 | retry: | 937 | retry: |
910 | ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); | 938 | ret = get_futex_key(uaddr1, fshared, &key1); |
911 | if (unlikely(ret != 0)) | 939 | if (unlikely(ret != 0)) |
912 | goto out; | 940 | goto out; |
913 | ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); | 941 | ret = get_futex_key(uaddr2, fshared, &key2); |
914 | if (unlikely(ret != 0)) | 942 | if (unlikely(ret != 0)) |
915 | goto out_put_key1; | 943 | goto out_put_key1; |
916 | 944 | ||
@@ -1004,7 +1032,7 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, | |||
1004 | plist_add(&q->list, &hb2->chain); | 1032 | plist_add(&q->list, &hb2->chain); |
1005 | q->lock_ptr = &hb2->lock; | 1033 | q->lock_ptr = &hb2->lock; |
1006 | #ifdef CONFIG_DEBUG_PI_LIST | 1034 | #ifdef CONFIG_DEBUG_PI_LIST |
1007 | q->list.plist.lock = &hb2->lock; | 1035 | q->list.plist.spinlock = &hb2->lock; |
1008 | #endif | 1036 | #endif |
1009 | } | 1037 | } |
1010 | get_futex_key_refs(key2); | 1038 | get_futex_key_refs(key2); |
@@ -1040,7 +1068,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, | |||
1040 | 1068 | ||
1041 | q->lock_ptr = &hb->lock; | 1069 | q->lock_ptr = &hb->lock; |
1042 | #ifdef CONFIG_DEBUG_PI_LIST | 1070 | #ifdef CONFIG_DEBUG_PI_LIST |
1043 | q->list.plist.lock = &hb->lock; | 1071 | q->list.plist.spinlock = &hb->lock; |
1044 | #endif | 1072 | #endif |
1045 | 1073 | ||
1046 | wake_up_state(q->task, TASK_NORMAL); | 1074 | wake_up_state(q->task, TASK_NORMAL); |
@@ -1169,11 +1197,10 @@ retry: | |||
1169 | pi_state = NULL; | 1197 | pi_state = NULL; |
1170 | } | 1198 | } |
1171 | 1199 | ||
1172 | ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ); | 1200 | ret = get_futex_key(uaddr1, fshared, &key1); |
1173 | if (unlikely(ret != 0)) | 1201 | if (unlikely(ret != 0)) |
1174 | goto out; | 1202 | goto out; |
1175 | ret = get_futex_key(uaddr2, fshared, &key2, | 1203 | ret = get_futex_key(uaddr2, fshared, &key2); |
1176 | requeue_pi ? VERIFY_WRITE : VERIFY_READ); | ||
1177 | if (unlikely(ret != 0)) | 1204 | if (unlikely(ret != 0)) |
1178 | goto out_put_key1; | 1205 | goto out_put_key1; |
1179 | 1206 | ||
@@ -1388,7 +1415,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | |||
1388 | 1415 | ||
1389 | plist_node_init(&q->list, prio); | 1416 | plist_node_init(&q->list, prio); |
1390 | #ifdef CONFIG_DEBUG_PI_LIST | 1417 | #ifdef CONFIG_DEBUG_PI_LIST |
1391 | q->list.plist.lock = &hb->lock; | 1418 | q->list.plist.spinlock = &hb->lock; |
1392 | #endif | 1419 | #endif |
1393 | plist_add(&q->list, &hb->chain); | 1420 | plist_add(&q->list, &hb->chain); |
1394 | q->task = current; | 1421 | q->task = current; |
@@ -1523,18 +1550,18 @@ retry: | |||
1523 | * itself. | 1550 | * itself. |
1524 | */ | 1551 | */ |
1525 | if (pi_state->owner != NULL) { | 1552 | if (pi_state->owner != NULL) { |
1526 | spin_lock_irq(&pi_state->owner->pi_lock); | 1553 | raw_spin_lock_irq(&pi_state->owner->pi_lock); |
1527 | WARN_ON(list_empty(&pi_state->list)); | 1554 | WARN_ON(list_empty(&pi_state->list)); |
1528 | list_del_init(&pi_state->list); | 1555 | list_del_init(&pi_state->list); |
1529 | spin_unlock_irq(&pi_state->owner->pi_lock); | 1556 | raw_spin_unlock_irq(&pi_state->owner->pi_lock); |
1530 | } | 1557 | } |
1531 | 1558 | ||
1532 | pi_state->owner = newowner; | 1559 | pi_state->owner = newowner; |
1533 | 1560 | ||
1534 | spin_lock_irq(&newowner->pi_lock); | 1561 | raw_spin_lock_irq(&newowner->pi_lock); |
1535 | WARN_ON(!list_empty(&pi_state->list)); | 1562 | WARN_ON(!list_empty(&pi_state->list)); |
1536 | list_add(&pi_state->list, &newowner->pi_state_list); | 1563 | list_add(&pi_state->list, &newowner->pi_state_list); |
1537 | spin_unlock_irq(&newowner->pi_lock); | 1564 | raw_spin_unlock_irq(&newowner->pi_lock); |
1538 | return 0; | 1565 | return 0; |
1539 | 1566 | ||
1540 | /* | 1567 | /* |
@@ -1732,7 +1759,7 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, | |||
1732 | */ | 1759 | */ |
1733 | retry: | 1760 | retry: |
1734 | q->key = FUTEX_KEY_INIT; | 1761 | q->key = FUTEX_KEY_INIT; |
1735 | ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ); | 1762 | ret = get_futex_key(uaddr, fshared, &q->key); |
1736 | if (unlikely(ret != 0)) | 1763 | if (unlikely(ret != 0)) |
1737 | return ret; | 1764 | return ret; |
1738 | 1765 | ||
@@ -1898,7 +1925,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, | |||
1898 | q.requeue_pi_key = NULL; | 1925 | q.requeue_pi_key = NULL; |
1899 | retry: | 1926 | retry: |
1900 | q.key = FUTEX_KEY_INIT; | 1927 | q.key = FUTEX_KEY_INIT; |
1901 | ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE); | 1928 | ret = get_futex_key(uaddr, fshared, &q.key); |
1902 | if (unlikely(ret != 0)) | 1929 | if (unlikely(ret != 0)) |
1903 | goto out; | 1930 | goto out; |
1904 | 1931 | ||
@@ -1968,7 +1995,7 @@ retry_private: | |||
1968 | /* Unqueue and drop the lock */ | 1995 | /* Unqueue and drop the lock */ |
1969 | unqueue_me_pi(&q); | 1996 | unqueue_me_pi(&q); |
1970 | 1997 | ||
1971 | goto out; | 1998 | goto out_put_key; |
1972 | 1999 | ||
1973 | out_unlock_put_key: | 2000 | out_unlock_put_key: |
1974 | queue_unlock(&q, hb); | 2001 | queue_unlock(&q, hb); |
@@ -2017,7 +2044,7 @@ retry: | |||
2017 | if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) | 2044 | if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) |
2018 | return -EPERM; | 2045 | return -EPERM; |
2019 | 2046 | ||
2020 | ret = get_futex_key(uaddr, fshared, &key, VERIFY_WRITE); | 2047 | ret = get_futex_key(uaddr, fshared, &key); |
2021 | if (unlikely(ret != 0)) | 2048 | if (unlikely(ret != 0)) |
2022 | goto out; | 2049 | goto out; |
2023 | 2050 | ||
@@ -2209,7 +2236,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2209 | rt_waiter.task = NULL; | 2236 | rt_waiter.task = NULL; |
2210 | 2237 | ||
2211 | key2 = FUTEX_KEY_INIT; | 2238 | key2 = FUTEX_KEY_INIT; |
2212 | ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); | 2239 | ret = get_futex_key(uaddr2, fshared, &key2); |
2213 | if (unlikely(ret != 0)) | 2240 | if (unlikely(ret != 0)) |
2214 | goto out; | 2241 | goto out; |
2215 | 2242 | ||