diff options
Diffstat (limited to 'kernel/futex.c')
-rw-r--r-- | kernel/futex.c | 524 |
1 files changed, 283 insertions, 241 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 6a3a5fa1526d..fe28dc282eae 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -69,6 +69,14 @@ int __read_mostly futex_cmpxchg_enabled; | |||
69 | #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) | 69 | #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) |
70 | 70 | ||
71 | /* | 71 | /* |
72 | * Futex flags used to encode options to functions and preserve them across | ||
73 | * restarts. | ||
74 | */ | ||
75 | #define FLAGS_SHARED 0x01 | ||
76 | #define FLAGS_CLOCKRT 0x02 | ||
77 | #define FLAGS_HAS_TIMEOUT 0x04 | ||
78 | |||
79 | /* | ||
72 | * Priority Inheritance state: | 80 | * Priority Inheritance state: |
73 | */ | 81 | */ |
74 | struct futex_pi_state { | 82 | struct futex_pi_state { |
@@ -91,6 +99,7 @@ struct futex_pi_state { | |||
91 | 99 | ||
92 | /** | 100 | /** |
93 | * struct futex_q - The hashed futex queue entry, one per waiting task | 101 | * struct futex_q - The hashed futex queue entry, one per waiting task |
102 | * @list: priority-sorted list of tasks waiting on this futex | ||
94 | * @task: the task waiting on the futex | 103 | * @task: the task waiting on the futex |
95 | * @lock_ptr: the hash bucket lock | 104 | * @lock_ptr: the hash bucket lock |
96 | * @key: the key the futex is hashed on | 105 | * @key: the key the futex is hashed on |
@@ -104,7 +113,7 @@ struct futex_pi_state { | |||
104 | * | 113 | * |
105 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. | 114 | * A futex_q has a woken state, just like tasks have TASK_RUNNING. |
106 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. | 115 | * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. |
107 | * The order of wakup is always to make the first condition true, then | 116 | * The order of wakeup is always to make the first condition true, then |
108 | * the second. | 117 | * the second. |
109 | * | 118 | * |
110 | * PI futexes are typically woken before they are removed from the hash list via | 119 | * PI futexes are typically woken before they are removed from the hash list via |
@@ -122,6 +131,12 @@ struct futex_q { | |||
122 | u32 bitset; | 131 | u32 bitset; |
123 | }; | 132 | }; |
124 | 133 | ||
134 | static const struct futex_q futex_q_init = { | ||
135 | /* list gets initialized in queue_me()*/ | ||
136 | .key = FUTEX_KEY_INIT, | ||
137 | .bitset = FUTEX_BITSET_MATCH_ANY | ||
138 | }; | ||
139 | |||
125 | /* | 140 | /* |
126 | * Hash buckets are shared by all the futex_keys that hash to the same | 141 | * Hash buckets are shared by all the futex_keys that hash to the same |
127 | * location. Each key may have multiple futex_q structures, one for each task | 142 | * location. Each key may have multiple futex_q structures, one for each task |
@@ -168,7 +183,7 @@ static void get_futex_key_refs(union futex_key *key) | |||
168 | 183 | ||
169 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | 184 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { |
170 | case FUT_OFF_INODE: | 185 | case FUT_OFF_INODE: |
171 | atomic_inc(&key->shared.inode->i_count); | 186 | ihold(key->shared.inode); |
172 | break; | 187 | break; |
173 | case FUT_OFF_MMSHARED: | 188 | case FUT_OFF_MMSHARED: |
174 | atomic_inc(&key->private.mm->mm_count); | 189 | atomic_inc(&key->private.mm->mm_count); |
@@ -218,7 +233,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) | |||
218 | { | 233 | { |
219 | unsigned long address = (unsigned long)uaddr; | 234 | unsigned long address = (unsigned long)uaddr; |
220 | struct mm_struct *mm = current->mm; | 235 | struct mm_struct *mm = current->mm; |
221 | struct page *page; | 236 | struct page *page, *page_head; |
222 | int err; | 237 | int err; |
223 | 238 | ||
224 | /* | 239 | /* |
@@ -250,11 +265,46 @@ again: | |||
250 | if (err < 0) | 265 | if (err < 0) |
251 | return err; | 266 | return err; |
252 | 267 | ||
253 | page = compound_head(page); | 268 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
254 | lock_page(page); | 269 | page_head = page; |
255 | if (!page->mapping) { | 270 | if (unlikely(PageTail(page))) { |
256 | unlock_page(page); | ||
257 | put_page(page); | 271 | put_page(page); |
272 | /* serialize against __split_huge_page_splitting() */ | ||
273 | local_irq_disable(); | ||
274 | if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) { | ||
275 | page_head = compound_head(page); | ||
276 | /* | ||
277 | * page_head is valid pointer but we must pin | ||
278 | * it before taking the PG_lock and/or | ||
279 | * PG_compound_lock. The moment we re-enable | ||
280 | * irqs __split_huge_page_splitting() can | ||
281 | * return and the head page can be freed from | ||
282 | * under us. We can't take the PG_lock and/or | ||
283 | * PG_compound_lock on a page that could be | ||
284 | * freed from under us. | ||
285 | */ | ||
286 | if (page != page_head) { | ||
287 | get_page(page_head); | ||
288 | put_page(page); | ||
289 | } | ||
290 | local_irq_enable(); | ||
291 | } else { | ||
292 | local_irq_enable(); | ||
293 | goto again; | ||
294 | } | ||
295 | } | ||
296 | #else | ||
297 | page_head = compound_head(page); | ||
298 | if (page != page_head) { | ||
299 | get_page(page_head); | ||
300 | put_page(page); | ||
301 | } | ||
302 | #endif | ||
303 | |||
304 | lock_page(page_head); | ||
305 | if (!page_head->mapping) { | ||
306 | unlock_page(page_head); | ||
307 | put_page(page_head); | ||
258 | goto again; | 308 | goto again; |
259 | } | 309 | } |
260 | 310 | ||
@@ -265,25 +315,24 @@ again: | |||
265 | * it's a read-only handle, it's expected that futexes attach to | 315 | * it's a read-only handle, it's expected that futexes attach to |
266 | * the object not the particular process. | 316 | * the object not the particular process. |
267 | */ | 317 | */ |
268 | if (PageAnon(page)) { | 318 | if (PageAnon(page_head)) { |
269 | key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ | 319 | key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ |
270 | key->private.mm = mm; | 320 | key->private.mm = mm; |
271 | key->private.address = address; | 321 | key->private.address = address; |
272 | } else { | 322 | } else { |
273 | key->both.offset |= FUT_OFF_INODE; /* inode-based key */ | 323 | key->both.offset |= FUT_OFF_INODE; /* inode-based key */ |
274 | key->shared.inode = page->mapping->host; | 324 | key->shared.inode = page_head->mapping->host; |
275 | key->shared.pgoff = page->index; | 325 | key->shared.pgoff = page_head->index; |
276 | } | 326 | } |
277 | 327 | ||
278 | get_futex_key_refs(key); | 328 | get_futex_key_refs(key); |
279 | 329 | ||
280 | unlock_page(page); | 330 | unlock_page(page_head); |
281 | put_page(page); | 331 | put_page(page_head); |
282 | return 0; | 332 | return 0; |
283 | } | 333 | } |
284 | 334 | ||
285 | static inline | 335 | static inline void put_futex_key(union futex_key *key) |
286 | void put_futex_key(int fshared, union futex_key *key) | ||
287 | { | 336 | { |
288 | drop_futex_key_refs(key); | 337 | drop_futex_key_refs(key); |
289 | } | 338 | } |
@@ -295,7 +344,7 @@ void put_futex_key(int fshared, union futex_key *key) | |||
295 | * Slow path to fixup the fault we just took in the atomic write | 344 | * Slow path to fixup the fault we just took in the atomic write |
296 | * access to @uaddr. | 345 | * access to @uaddr. |
297 | * | 346 | * |
298 | * We have no generic implementation of a non destructive write to the | 347 | * We have no generic implementation of a non-destructive write to the |
299 | * user address. We know that we faulted in the atomic pagefault | 348 | * user address. We know that we faulted in the atomic pagefault |
300 | * disabled section so we can as well avoid the #PF overhead by | 349 | * disabled section so we can as well avoid the #PF overhead by |
301 | * calling get_user_pages() right away. | 350 | * calling get_user_pages() right away. |
@@ -332,15 +381,16 @@ static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, | |||
332 | return NULL; | 381 | return NULL; |
333 | } | 382 | } |
334 | 383 | ||
335 | static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) | 384 | static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr, |
385 | u32 uval, u32 newval) | ||
336 | { | 386 | { |
337 | u32 curval; | 387 | int ret; |
338 | 388 | ||
339 | pagefault_disable(); | 389 | pagefault_disable(); |
340 | curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); | 390 | ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); |
341 | pagefault_enable(); | 391 | pagefault_enable(); |
342 | 392 | ||
343 | return curval; | 393 | return ret; |
344 | } | 394 | } |
345 | 395 | ||
346 | static int get_futex_value_locked(u32 *dest, u32 __user *from) | 396 | static int get_futex_value_locked(u32 *dest, u32 __user *from) |
@@ -515,7 +565,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |||
515 | */ | 565 | */ |
516 | pi_state = this->pi_state; | 566 | pi_state = this->pi_state; |
517 | /* | 567 | /* |
518 | * Userspace might have messed up non PI and PI futexes | 568 | * Userspace might have messed up non-PI and PI futexes |
519 | */ | 569 | */ |
520 | if (unlikely(!pi_state)) | 570 | if (unlikely(!pi_state)) |
521 | return -EINVAL; | 571 | return -EINVAL; |
@@ -625,7 +675,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, | |||
625 | struct task_struct *task, int set_waiters) | 675 | struct task_struct *task, int set_waiters) |
626 | { | 676 | { |
627 | int lock_taken, ret, ownerdied = 0; | 677 | int lock_taken, ret, ownerdied = 0; |
628 | u32 uval, newval, curval; | 678 | u32 uval, newval, curval, vpid = task_pid_vnr(task); |
629 | 679 | ||
630 | retry: | 680 | retry: |
631 | ret = lock_taken = 0; | 681 | ret = lock_taken = 0; |
@@ -635,19 +685,17 @@ retry: | |||
635 | * (by doing a 0 -> TID atomic cmpxchg), while holding all | 685 | * (by doing a 0 -> TID atomic cmpxchg), while holding all |
636 | * the locks. It will most likely not succeed. | 686 | * the locks. It will most likely not succeed. |
637 | */ | 687 | */ |
638 | newval = task_pid_vnr(task); | 688 | newval = vpid; |
639 | if (set_waiters) | 689 | if (set_waiters) |
640 | newval |= FUTEX_WAITERS; | 690 | newval |= FUTEX_WAITERS; |
641 | 691 | ||
642 | curval = cmpxchg_futex_value_locked(uaddr, 0, newval); | 692 | if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval))) |
643 | |||
644 | if (unlikely(curval == -EFAULT)) | ||
645 | return -EFAULT; | 693 | return -EFAULT; |
646 | 694 | ||
647 | /* | 695 | /* |
648 | * Detect deadlocks. | 696 | * Detect deadlocks. |
649 | */ | 697 | */ |
650 | if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task)))) | 698 | if ((unlikely((curval & FUTEX_TID_MASK) == vpid))) |
651 | return -EDEADLK; | 699 | return -EDEADLK; |
652 | 700 | ||
653 | /* | 701 | /* |
@@ -674,14 +722,12 @@ retry: | |||
674 | */ | 722 | */ |
675 | if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { | 723 | if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { |
676 | /* Keep the OWNER_DIED bit */ | 724 | /* Keep the OWNER_DIED bit */ |
677 | newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task); | 725 | newval = (curval & ~FUTEX_TID_MASK) | vpid; |
678 | ownerdied = 0; | 726 | ownerdied = 0; |
679 | lock_taken = 1; | 727 | lock_taken = 1; |
680 | } | 728 | } |
681 | 729 | ||
682 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); | 730 | if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) |
683 | |||
684 | if (unlikely(curval == -EFAULT)) | ||
685 | return -EFAULT; | 731 | return -EFAULT; |
686 | if (unlikely(curval != uval)) | 732 | if (unlikely(curval != uval)) |
687 | goto retry; | 733 | goto retry; |
@@ -726,6 +772,24 @@ retry: | |||
726 | return ret; | 772 | return ret; |
727 | } | 773 | } |
728 | 774 | ||
775 | /** | ||
776 | * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket | ||
777 | * @q: The futex_q to unqueue | ||
778 | * | ||
779 | * The q->lock_ptr must not be NULL and must be held by the caller. | ||
780 | */ | ||
781 | static void __unqueue_futex(struct futex_q *q) | ||
782 | { | ||
783 | struct futex_hash_bucket *hb; | ||
784 | |||
785 | if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr)) | ||
786 | || WARN_ON(plist_node_empty(&q->list))) | ||
787 | return; | ||
788 | |||
789 | hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); | ||
790 | plist_del(&q->list, &hb->chain); | ||
791 | } | ||
792 | |||
729 | /* | 793 | /* |
730 | * The hash bucket lock must be held when this is called. | 794 | * The hash bucket lock must be held when this is called. |
731 | * Afterwards, the futex_q must not be accessed. | 795 | * Afterwards, the futex_q must not be accessed. |
@@ -736,14 +800,14 @@ static void wake_futex(struct futex_q *q) | |||
736 | 800 | ||
737 | /* | 801 | /* |
738 | * We set q->lock_ptr = NULL _before_ we wake up the task. If | 802 | * We set q->lock_ptr = NULL _before_ we wake up the task. If |
739 | * a non futex wake up happens on another CPU then the task | 803 | * a non-futex wake up happens on another CPU then the task |
740 | * might exit and p would dereference a non existing task | 804 | * might exit and p would dereference a non-existing task |
741 | * struct. Prevent this by holding a reference on p across the | 805 | * struct. Prevent this by holding a reference on p across the |
742 | * wake up. | 806 | * wake up. |
743 | */ | 807 | */ |
744 | get_task_struct(p); | 808 | get_task_struct(p); |
745 | 809 | ||
746 | plist_del(&q->list, &q->list.plist); | 810 | __unqueue_futex(q); |
747 | /* | 811 | /* |
748 | * The waiting task can free the futex_q as soon as | 812 | * The waiting task can free the futex_q as soon as |
749 | * q->lock_ptr = NULL is written, without taking any locks. A | 813 | * q->lock_ptr = NULL is written, without taking any locks. A |
@@ -777,10 +841,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
777 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); | 841 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); |
778 | 842 | ||
779 | /* | 843 | /* |
780 | * This happens when we have stolen the lock and the original | 844 | * It is possible that the next waiter (the one that brought |
781 | * pending owner did not enqueue itself back on the rt_mutex. | 845 | * this owner to the kernel) timed out and is no longer |
782 | * Thats not a tragedy. We know that way, that a lock waiter | 846 | * waiting on the lock. |
783 | * is on the fly. We make the futex_q waiter the pending owner. | ||
784 | */ | 847 | */ |
785 | if (!new_owner) | 848 | if (!new_owner) |
786 | new_owner = this->task; | 849 | new_owner = this->task; |
@@ -795,9 +858,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
795 | 858 | ||
796 | newval = FUTEX_WAITERS | task_pid_vnr(new_owner); | 859 | newval = FUTEX_WAITERS | task_pid_vnr(new_owner); |
797 | 860 | ||
798 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); | 861 | if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) |
799 | |||
800 | if (curval == -EFAULT) | ||
801 | ret = -EFAULT; | 862 | ret = -EFAULT; |
802 | else if (curval != uval) | 863 | else if (curval != uval) |
803 | ret = -EINVAL; | 864 | ret = -EINVAL; |
@@ -832,10 +893,8 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval) | |||
832 | * There is no waiter, so we unlock the futex. The owner died | 893 | * There is no waiter, so we unlock the futex. The owner died |
833 | * bit has not to be preserved here. We are the owner: | 894 | * bit has not to be preserved here. We are the owner: |
834 | */ | 895 | */ |
835 | oldval = cmpxchg_futex_value_locked(uaddr, uval, 0); | 896 | if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0)) |
836 | 897 | return -EFAULT; | |
837 | if (oldval == -EFAULT) | ||
838 | return oldval; | ||
839 | if (oldval != uval) | 898 | if (oldval != uval) |
840 | return -EAGAIN; | 899 | return -EAGAIN; |
841 | 900 | ||
@@ -869,7 +928,8 @@ double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) | |||
869 | /* | 928 | /* |
870 | * Wake up waiters matching bitset queued on this futex (uaddr). | 929 | * Wake up waiters matching bitset queued on this futex (uaddr). |
871 | */ | 930 | */ |
872 | static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) | 931 | static int |
932 | futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) | ||
873 | { | 933 | { |
874 | struct futex_hash_bucket *hb; | 934 | struct futex_hash_bucket *hb; |
875 | struct futex_q *this, *next; | 935 | struct futex_q *this, *next; |
@@ -880,7 +940,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) | |||
880 | if (!bitset) | 940 | if (!bitset) |
881 | return -EINVAL; | 941 | return -EINVAL; |
882 | 942 | ||
883 | ret = get_futex_key(uaddr, fshared, &key); | 943 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); |
884 | if (unlikely(ret != 0)) | 944 | if (unlikely(ret != 0)) |
885 | goto out; | 945 | goto out; |
886 | 946 | ||
@@ -906,7 +966,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) | |||
906 | } | 966 | } |
907 | 967 | ||
908 | spin_unlock(&hb->lock); | 968 | spin_unlock(&hb->lock); |
909 | put_futex_key(fshared, &key); | 969 | put_futex_key(&key); |
910 | out: | 970 | out: |
911 | return ret; | 971 | return ret; |
912 | } | 972 | } |
@@ -916,7 +976,7 @@ out: | |||
916 | * to this virtual address: | 976 | * to this virtual address: |
917 | */ | 977 | */ |
918 | static int | 978 | static int |
919 | futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, | 979 | futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, |
920 | int nr_wake, int nr_wake2, int op) | 980 | int nr_wake, int nr_wake2, int op) |
921 | { | 981 | { |
922 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; | 982 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; |
@@ -926,10 +986,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, | |||
926 | int ret, op_ret; | 986 | int ret, op_ret; |
927 | 987 | ||
928 | retry: | 988 | retry: |
929 | ret = get_futex_key(uaddr1, fshared, &key1); | 989 | ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); |
930 | if (unlikely(ret != 0)) | 990 | if (unlikely(ret != 0)) |
931 | goto out; | 991 | goto out; |
932 | ret = get_futex_key(uaddr2, fshared, &key2); | 992 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); |
933 | if (unlikely(ret != 0)) | 993 | if (unlikely(ret != 0)) |
934 | goto out_put_key1; | 994 | goto out_put_key1; |
935 | 995 | ||
@@ -961,11 +1021,11 @@ retry_private: | |||
961 | if (ret) | 1021 | if (ret) |
962 | goto out_put_keys; | 1022 | goto out_put_keys; |
963 | 1023 | ||
964 | if (!fshared) | 1024 | if (!(flags & FLAGS_SHARED)) |
965 | goto retry_private; | 1025 | goto retry_private; |
966 | 1026 | ||
967 | put_futex_key(fshared, &key2); | 1027 | put_futex_key(&key2); |
968 | put_futex_key(fshared, &key1); | 1028 | put_futex_key(&key1); |
969 | goto retry; | 1029 | goto retry; |
970 | } | 1030 | } |
971 | 1031 | ||
@@ -995,9 +1055,9 @@ retry_private: | |||
995 | 1055 | ||
996 | double_unlock_hb(hb1, hb2); | 1056 | double_unlock_hb(hb1, hb2); |
997 | out_put_keys: | 1057 | out_put_keys: |
998 | put_futex_key(fshared, &key2); | 1058 | put_futex_key(&key2); |
999 | out_put_key1: | 1059 | out_put_key1: |
1000 | put_futex_key(fshared, &key1); | 1060 | put_futex_key(&key1); |
1001 | out: | 1061 | out: |
1002 | return ret; | 1062 | return ret; |
1003 | } | 1063 | } |
@@ -1022,9 +1082,6 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, | |||
1022 | plist_del(&q->list, &hb1->chain); | 1082 | plist_del(&q->list, &hb1->chain); |
1023 | plist_add(&q->list, &hb2->chain); | 1083 | plist_add(&q->list, &hb2->chain); |
1024 | q->lock_ptr = &hb2->lock; | 1084 | q->lock_ptr = &hb2->lock; |
1025 | #ifdef CONFIG_DEBUG_PI_LIST | ||
1026 | q->list.plist.spinlock = &hb2->lock; | ||
1027 | #endif | ||
1028 | } | 1085 | } |
1029 | get_futex_key_refs(key2); | 1086 | get_futex_key_refs(key2); |
1030 | q->key = *key2; | 1087 | q->key = *key2; |
@@ -1051,16 +1108,12 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, | |||
1051 | get_futex_key_refs(key); | 1108 | get_futex_key_refs(key); |
1052 | q->key = *key; | 1109 | q->key = *key; |
1053 | 1110 | ||
1054 | WARN_ON(plist_node_empty(&q->list)); | 1111 | __unqueue_futex(q); |
1055 | plist_del(&q->list, &q->list.plist); | ||
1056 | 1112 | ||
1057 | WARN_ON(!q->rt_waiter); | 1113 | WARN_ON(!q->rt_waiter); |
1058 | q->rt_waiter = NULL; | 1114 | q->rt_waiter = NULL; |
1059 | 1115 | ||
1060 | q->lock_ptr = &hb->lock; | 1116 | q->lock_ptr = &hb->lock; |
1061 | #ifdef CONFIG_DEBUG_PI_LIST | ||
1062 | q->list.plist.spinlock = &hb->lock; | ||
1063 | #endif | ||
1064 | 1117 | ||
1065 | wake_up_state(q->task, TASK_NORMAL); | 1118 | wake_up_state(q->task, TASK_NORMAL); |
1066 | } | 1119 | } |
@@ -1131,12 +1184,14 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | |||
1131 | 1184 | ||
1132 | /** | 1185 | /** |
1133 | * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 | 1186 | * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 |
1134 | * uaddr1: source futex user address | 1187 | * @uaddr1: source futex user address |
1135 | * uaddr2: target futex user address | 1188 | * @flags: futex flags (FLAGS_SHARED, etc.) |
1136 | * nr_wake: number of waiters to wake (must be 1 for requeue_pi) | 1189 | * @uaddr2: target futex user address |
1137 | * nr_requeue: number of waiters to requeue (0-INT_MAX) | 1190 | * @nr_wake: number of waiters to wake (must be 1 for requeue_pi) |
1138 | * requeue_pi: if we are attempting to requeue from a non-pi futex to a | 1191 | * @nr_requeue: number of waiters to requeue (0-INT_MAX) |
1139 | * pi futex (pi to pi requeue is not supported) | 1192 | * @cmpval: @uaddr1 expected value (or %NULL) |
1193 | * @requeue_pi: if we are attempting to requeue from a non-pi futex to a | ||
1194 | * pi futex (pi to pi requeue is not supported) | ||
1140 | * | 1195 | * |
1141 | * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire | 1196 | * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire |
1142 | * uaddr2 atomically on behalf of the top waiter. | 1197 | * uaddr2 atomically on behalf of the top waiter. |
@@ -1145,9 +1200,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | |||
1145 | * >=0 - on success, the number of tasks requeued or woken | 1200 | * >=0 - on success, the number of tasks requeued or woken |
1146 | * <0 - on error | 1201 | * <0 - on error |
1147 | */ | 1202 | */ |
1148 | static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, | 1203 | static int futex_requeue(u32 __user *uaddr1, unsigned int flags, |
1149 | int nr_wake, int nr_requeue, u32 *cmpval, | 1204 | u32 __user *uaddr2, int nr_wake, int nr_requeue, |
1150 | int requeue_pi) | 1205 | u32 *cmpval, int requeue_pi) |
1151 | { | 1206 | { |
1152 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; | 1207 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; |
1153 | int drop_count = 0, task_count = 0, ret; | 1208 | int drop_count = 0, task_count = 0, ret; |
@@ -1188,10 +1243,10 @@ retry: | |||
1188 | pi_state = NULL; | 1243 | pi_state = NULL; |
1189 | } | 1244 | } |
1190 | 1245 | ||
1191 | ret = get_futex_key(uaddr1, fshared, &key1); | 1246 | ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1); |
1192 | if (unlikely(ret != 0)) | 1247 | if (unlikely(ret != 0)) |
1193 | goto out; | 1248 | goto out; |
1194 | ret = get_futex_key(uaddr2, fshared, &key2); | 1249 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); |
1195 | if (unlikely(ret != 0)) | 1250 | if (unlikely(ret != 0)) |
1196 | goto out_put_key1; | 1251 | goto out_put_key1; |
1197 | 1252 | ||
@@ -1213,11 +1268,11 @@ retry_private: | |||
1213 | if (ret) | 1268 | if (ret) |
1214 | goto out_put_keys; | 1269 | goto out_put_keys; |
1215 | 1270 | ||
1216 | if (!fshared) | 1271 | if (!(flags & FLAGS_SHARED)) |
1217 | goto retry_private; | 1272 | goto retry_private; |
1218 | 1273 | ||
1219 | put_futex_key(fshared, &key2); | 1274 | put_futex_key(&key2); |
1220 | put_futex_key(fshared, &key1); | 1275 | put_futex_key(&key1); |
1221 | goto retry; | 1276 | goto retry; |
1222 | } | 1277 | } |
1223 | if (curval != *cmpval) { | 1278 | if (curval != *cmpval) { |
@@ -1257,8 +1312,8 @@ retry_private: | |||
1257 | break; | 1312 | break; |
1258 | case -EFAULT: | 1313 | case -EFAULT: |
1259 | double_unlock_hb(hb1, hb2); | 1314 | double_unlock_hb(hb1, hb2); |
1260 | put_futex_key(fshared, &key2); | 1315 | put_futex_key(&key2); |
1261 | put_futex_key(fshared, &key1); | 1316 | put_futex_key(&key1); |
1262 | ret = fault_in_user_writeable(uaddr2); | 1317 | ret = fault_in_user_writeable(uaddr2); |
1263 | if (!ret) | 1318 | if (!ret) |
1264 | goto retry; | 1319 | goto retry; |
@@ -1266,8 +1321,8 @@ retry_private: | |||
1266 | case -EAGAIN: | 1321 | case -EAGAIN: |
1267 | /* The owner was exiting, try again. */ | 1322 | /* The owner was exiting, try again. */ |
1268 | double_unlock_hb(hb1, hb2); | 1323 | double_unlock_hb(hb1, hb2); |
1269 | put_futex_key(fshared, &key2); | 1324 | put_futex_key(&key2); |
1270 | put_futex_key(fshared, &key1); | 1325 | put_futex_key(&key1); |
1271 | cond_resched(); | 1326 | cond_resched(); |
1272 | goto retry; | 1327 | goto retry; |
1273 | default: | 1328 | default: |
@@ -1349,9 +1404,9 @@ out_unlock: | |||
1349 | drop_futex_key_refs(&key1); | 1404 | drop_futex_key_refs(&key1); |
1350 | 1405 | ||
1351 | out_put_keys: | 1406 | out_put_keys: |
1352 | put_futex_key(fshared, &key2); | 1407 | put_futex_key(&key2); |
1353 | out_put_key1: | 1408 | out_put_key1: |
1354 | put_futex_key(fshared, &key1); | 1409 | put_futex_key(&key1); |
1355 | out: | 1410 | out: |
1356 | if (pi_state != NULL) | 1411 | if (pi_state != NULL) |
1357 | free_pi_state(pi_state); | 1412 | free_pi_state(pi_state); |
@@ -1360,10 +1415,10 @@ out: | |||
1360 | 1415 | ||
1361 | /* The key must be already stored in q->key. */ | 1416 | /* The key must be already stored in q->key. */ |
1362 | static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | 1417 | static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) |
1418 | __acquires(&hb->lock) | ||
1363 | { | 1419 | { |
1364 | struct futex_hash_bucket *hb; | 1420 | struct futex_hash_bucket *hb; |
1365 | 1421 | ||
1366 | get_futex_key_refs(&q->key); | ||
1367 | hb = hash_futex(&q->key); | 1422 | hb = hash_futex(&q->key); |
1368 | q->lock_ptr = &hb->lock; | 1423 | q->lock_ptr = &hb->lock; |
1369 | 1424 | ||
@@ -1373,9 +1428,9 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | |||
1373 | 1428 | ||
1374 | static inline void | 1429 | static inline void |
1375 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | 1430 | queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) |
1431 | __releases(&hb->lock) | ||
1376 | { | 1432 | { |
1377 | spin_unlock(&hb->lock); | 1433 | spin_unlock(&hb->lock); |
1378 | drop_futex_key_refs(&q->key); | ||
1379 | } | 1434 | } |
1380 | 1435 | ||
1381 | /** | 1436 | /** |
@@ -1391,6 +1446,7 @@ queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) | |||
1391 | * an example). | 1446 | * an example). |
1392 | */ | 1447 | */ |
1393 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | 1448 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) |
1449 | __releases(&hb->lock) | ||
1394 | { | 1450 | { |
1395 | int prio; | 1451 | int prio; |
1396 | 1452 | ||
@@ -1405,9 +1461,6 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | |||
1405 | prio = min(current->normal_prio, MAX_RT_PRIO); | 1461 | prio = min(current->normal_prio, MAX_RT_PRIO); |
1406 | 1462 | ||
1407 | plist_node_init(&q->list, prio); | 1463 | plist_node_init(&q->list, prio); |
1408 | #ifdef CONFIG_DEBUG_PI_LIST | ||
1409 | q->list.plist.spinlock = &hb->lock; | ||
1410 | #endif | ||
1411 | plist_add(&q->list, &hb->chain); | 1464 | plist_add(&q->list, &hb->chain); |
1412 | q->task = current; | 1465 | q->task = current; |
1413 | spin_unlock(&hb->lock); | 1466 | spin_unlock(&hb->lock); |
@@ -1452,8 +1505,7 @@ retry: | |||
1452 | spin_unlock(lock_ptr); | 1505 | spin_unlock(lock_ptr); |
1453 | goto retry; | 1506 | goto retry; |
1454 | } | 1507 | } |
1455 | WARN_ON(plist_node_empty(&q->list)); | 1508 | __unqueue_futex(q); |
1456 | plist_del(&q->list, &q->list.plist); | ||
1457 | 1509 | ||
1458 | BUG_ON(q->pi_state); | 1510 | BUG_ON(q->pi_state); |
1459 | 1511 | ||
@@ -1471,17 +1523,15 @@ retry: | |||
1471 | * and dropped here. | 1523 | * and dropped here. |
1472 | */ | 1524 | */ |
1473 | static void unqueue_me_pi(struct futex_q *q) | 1525 | static void unqueue_me_pi(struct futex_q *q) |
1526 | __releases(q->lock_ptr) | ||
1474 | { | 1527 | { |
1475 | WARN_ON(plist_node_empty(&q->list)); | 1528 | __unqueue_futex(q); |
1476 | plist_del(&q->list, &q->list.plist); | ||
1477 | 1529 | ||
1478 | BUG_ON(!q->pi_state); | 1530 | BUG_ON(!q->pi_state); |
1479 | free_pi_state(q->pi_state); | 1531 | free_pi_state(q->pi_state); |
1480 | q->pi_state = NULL; | 1532 | q->pi_state = NULL; |
1481 | 1533 | ||
1482 | spin_unlock(q->lock_ptr); | 1534 | spin_unlock(q->lock_ptr); |
1483 | |||
1484 | drop_futex_key_refs(&q->key); | ||
1485 | } | 1535 | } |
1486 | 1536 | ||
1487 | /* | 1537 | /* |
@@ -1491,7 +1541,7 @@ static void unqueue_me_pi(struct futex_q *q) | |||
1491 | * private futexes. | 1541 | * private futexes. |
1492 | */ | 1542 | */ |
1493 | static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | 1543 | static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, |
1494 | struct task_struct *newowner, int fshared) | 1544 | struct task_struct *newowner) |
1495 | { | 1545 | { |
1496 | u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; | 1546 | u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; |
1497 | struct futex_pi_state *pi_state = q->pi_state; | 1547 | struct futex_pi_state *pi_state = q->pi_state; |
@@ -1505,10 +1555,10 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
1505 | 1555 | ||
1506 | /* | 1556 | /* |
1507 | * We are here either because we stole the rtmutex from the | 1557 | * We are here either because we stole the rtmutex from the |
1508 | * pending owner or we are the pending owner which failed to | 1558 | * previous highest priority waiter or we are the highest priority |
1509 | * get the rtmutex. We have to replace the pending owner TID | 1559 | * waiter but failed to get the rtmutex the first time. |
1510 | * in the user space variable. This must be atomic as we have | 1560 | * We have to replace the newowner TID in the user space variable. |
1511 | * to preserve the owner died bit here. | 1561 | * This must be atomic as we have to preserve the owner died bit here. |
1512 | * | 1562 | * |
1513 | * Note: We write the user space value _before_ changing the pi_state | 1563 | * Note: We write the user space value _before_ changing the pi_state |
1514 | * because we can fault here. Imagine swapped out pages or a fork | 1564 | * because we can fault here. Imagine swapped out pages or a fork |
@@ -1527,9 +1577,7 @@ retry: | |||
1527 | while (1) { | 1577 | while (1) { |
1528 | newval = (uval & FUTEX_OWNER_DIED) | newtid; | 1578 | newval = (uval & FUTEX_OWNER_DIED) | newtid; |
1529 | 1579 | ||
1530 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); | 1580 | if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) |
1531 | |||
1532 | if (curval == -EFAULT) | ||
1533 | goto handle_fault; | 1581 | goto handle_fault; |
1534 | if (curval == uval) | 1582 | if (curval == uval) |
1535 | break; | 1583 | break; |
@@ -1557,8 +1605,8 @@ retry: | |||
1557 | 1605 | ||
1558 | /* | 1606 | /* |
1559 | * To handle the page fault we need to drop the hash bucket | 1607 | * To handle the page fault we need to drop the hash bucket |
1560 | * lock here. That gives the other task (either the pending | 1608 | * lock here. That gives the other task (either the highest priority |
1561 | * owner itself or the task which stole the rtmutex) the | 1609 | * waiter itself or the task which stole the rtmutex) the |
1562 | * chance to try the fixup of the pi_state. So once we are | 1610 | * chance to try the fixup of the pi_state. So once we are |
1563 | * back from handling the fault we need to check the pi_state | 1611 | * back from handling the fault we need to check the pi_state |
1564 | * after reacquiring the hash bucket lock and before trying to | 1612 | * after reacquiring the hash bucket lock and before trying to |
@@ -1584,20 +1632,11 @@ handle_fault: | |||
1584 | goto retry; | 1632 | goto retry; |
1585 | } | 1633 | } |
1586 | 1634 | ||
1587 | /* | ||
1588 | * In case we must use restart_block to restart a futex_wait, | ||
1589 | * we encode in the 'flags' shared capability | ||
1590 | */ | ||
1591 | #define FLAGS_SHARED 0x01 | ||
1592 | #define FLAGS_CLOCKRT 0x02 | ||
1593 | #define FLAGS_HAS_TIMEOUT 0x04 | ||
1594 | |||
1595 | static long futex_wait_restart(struct restart_block *restart); | 1635 | static long futex_wait_restart(struct restart_block *restart); |
1596 | 1636 | ||
1597 | /** | 1637 | /** |
1598 | * fixup_owner() - Post lock pi_state and corner case management | 1638 | * fixup_owner() - Post lock pi_state and corner case management |
1599 | * @uaddr: user address of the futex | 1639 | * @uaddr: user address of the futex |
1600 | * @fshared: whether the futex is shared (1) or not (0) | ||
1601 | * @q: futex_q (contains pi_state and access to the rt_mutex) | 1640 | * @q: futex_q (contains pi_state and access to the rt_mutex) |
1602 | * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) | 1641 | * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) |
1603 | * | 1642 | * |
@@ -1610,8 +1649,7 @@ static long futex_wait_restart(struct restart_block *restart); | |||
1610 | * 0 - success, lock not taken | 1649 | * 0 - success, lock not taken |
1611 | * <0 - on error (-EFAULT) | 1650 | * <0 - on error (-EFAULT) |
1612 | */ | 1651 | */ |
1613 | static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, | 1652 | static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) |
1614 | int locked) | ||
1615 | { | 1653 | { |
1616 | struct task_struct *owner; | 1654 | struct task_struct *owner; |
1617 | int ret = 0; | 1655 | int ret = 0; |
@@ -1622,7 +1660,7 @@ static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, | |||
1622 | * did a lock-steal - fix up the PI-state in that case: | 1660 | * did a lock-steal - fix up the PI-state in that case: |
1623 | */ | 1661 | */ |
1624 | if (q->pi_state->owner != current) | 1662 | if (q->pi_state->owner != current) |
1625 | ret = fixup_pi_state_owner(uaddr, q, current, fshared); | 1663 | ret = fixup_pi_state_owner(uaddr, q, current); |
1626 | goto out; | 1664 | goto out; |
1627 | } | 1665 | } |
1628 | 1666 | ||
@@ -1644,18 +1682,20 @@ static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, | |||
1644 | /* | 1682 | /* |
1645 | * pi_state is incorrect, some other task did a lock steal and | 1683 | * pi_state is incorrect, some other task did a lock steal and |
1646 | * we returned due to timeout or signal without taking the | 1684 | * we returned due to timeout or signal without taking the |
1647 | * rt_mutex. Too late. We can access the rt_mutex_owner without | 1685 | * rt_mutex. Too late. |
1648 | * locking, as the other task is now blocked on the hash bucket | ||
1649 | * lock. Fix the state up. | ||
1650 | */ | 1686 | */ |
1687 | raw_spin_lock(&q->pi_state->pi_mutex.wait_lock); | ||
1651 | owner = rt_mutex_owner(&q->pi_state->pi_mutex); | 1688 | owner = rt_mutex_owner(&q->pi_state->pi_mutex); |
1652 | ret = fixup_pi_state_owner(uaddr, q, owner, fshared); | 1689 | if (!owner) |
1690 | owner = rt_mutex_next_owner(&q->pi_state->pi_mutex); | ||
1691 | raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock); | ||
1692 | ret = fixup_pi_state_owner(uaddr, q, owner); | ||
1653 | goto out; | 1693 | goto out; |
1654 | } | 1694 | } |
1655 | 1695 | ||
1656 | /* | 1696 | /* |
1657 | * Paranoia check. If we did not take the lock, then we should not be | 1697 | * Paranoia check. If we did not take the lock, then we should not be |
1658 | * the owner, nor the pending owner, of the rt_mutex. | 1698 | * the owner of the rt_mutex. |
1659 | */ | 1699 | */ |
1660 | if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) | 1700 | if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) |
1661 | printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " | 1701 | printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " |
@@ -1712,7 +1752,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | |||
1712 | * futex_wait_setup() - Prepare to wait on a futex | 1752 | * futex_wait_setup() - Prepare to wait on a futex |
1713 | * @uaddr: the futex userspace address | 1753 | * @uaddr: the futex userspace address |
1714 | * @val: the expected value | 1754 | * @val: the expected value |
1715 | * @fshared: whether the futex is shared (1) or not (0) | 1755 | * @flags: futex flags (FLAGS_SHARED, etc.) |
1716 | * @q: the associated futex_q | 1756 | * @q: the associated futex_q |
1717 | * @hb: storage for hash_bucket pointer to be returned to caller | 1757 | * @hb: storage for hash_bucket pointer to be returned to caller |
1718 | * | 1758 | * |
@@ -1725,7 +1765,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | |||
1725 | * 0 - uaddr contains val and hb has been locked | 1765 | * 0 - uaddr contains val and hb has been locked |
1726 | * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked | 1766 | * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked |
1727 | */ | 1767 | */ |
1728 | static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, | 1768 | static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, |
1729 | struct futex_q *q, struct futex_hash_bucket **hb) | 1769 | struct futex_q *q, struct futex_hash_bucket **hb) |
1730 | { | 1770 | { |
1731 | u32 uval; | 1771 | u32 uval; |
@@ -1740,17 +1780,17 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, | |||
1740 | * | 1780 | * |
1741 | * The basic logical guarantee of a futex is that it blocks ONLY | 1781 | * The basic logical guarantee of a futex is that it blocks ONLY |
1742 | * if cond(var) is known to be true at the time of blocking, for | 1782 | * if cond(var) is known to be true at the time of blocking, for |
1743 | * any cond. If we queued after testing *uaddr, that would open | 1783 | * any cond. If we locked the hash-bucket after testing *uaddr, that |
1744 | * a race condition where we could block indefinitely with | 1784 | * would open a race condition where we could block indefinitely with |
1745 | * cond(var) false, which would violate the guarantee. | 1785 | * cond(var) false, which would violate the guarantee. |
1746 | * | 1786 | * |
1747 | * A consequence is that futex_wait() can return zero and absorb | 1787 | * On the other hand, we insert q and release the hash-bucket only |
1748 | * a wakeup when *uaddr != val on entry to the syscall. This is | 1788 | * after testing *uaddr. This guarantees that futex_wait() will NOT |
1749 | * rare, but normal. | 1789 | * absorb a wakeup if *uaddr does not match the desired values |
1790 | * while the syscall executes. | ||
1750 | */ | 1791 | */ |
1751 | retry: | 1792 | retry: |
1752 | q->key = FUTEX_KEY_INIT; | 1793 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); |
1753 | ret = get_futex_key(uaddr, fshared, &q->key); | ||
1754 | if (unlikely(ret != 0)) | 1794 | if (unlikely(ret != 0)) |
1755 | return ret; | 1795 | return ret; |
1756 | 1796 | ||
@@ -1766,10 +1806,10 @@ retry_private: | |||
1766 | if (ret) | 1806 | if (ret) |
1767 | goto out; | 1807 | goto out; |
1768 | 1808 | ||
1769 | if (!fshared) | 1809 | if (!(flags & FLAGS_SHARED)) |
1770 | goto retry_private; | 1810 | goto retry_private; |
1771 | 1811 | ||
1772 | put_futex_key(fshared, &q->key); | 1812 | put_futex_key(&q->key); |
1773 | goto retry; | 1813 | goto retry; |
1774 | } | 1814 | } |
1775 | 1815 | ||
@@ -1780,40 +1820,40 @@ retry_private: | |||
1780 | 1820 | ||
1781 | out: | 1821 | out: |
1782 | if (ret) | 1822 | if (ret) |
1783 | put_futex_key(fshared, &q->key); | 1823 | put_futex_key(&q->key); |
1784 | return ret; | 1824 | return ret; |
1785 | } | 1825 | } |
1786 | 1826 | ||
1787 | static int futex_wait(u32 __user *uaddr, int fshared, | 1827 | static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, |
1788 | u32 val, ktime_t *abs_time, u32 bitset, int clockrt) | 1828 | ktime_t *abs_time, u32 bitset) |
1789 | { | 1829 | { |
1790 | struct hrtimer_sleeper timeout, *to = NULL; | 1830 | struct hrtimer_sleeper timeout, *to = NULL; |
1791 | struct restart_block *restart; | 1831 | struct restart_block *restart; |
1792 | struct futex_hash_bucket *hb; | 1832 | struct futex_hash_bucket *hb; |
1793 | struct futex_q q; | 1833 | struct futex_q q = futex_q_init; |
1794 | int ret; | 1834 | int ret; |
1795 | 1835 | ||
1796 | if (!bitset) | 1836 | if (!bitset) |
1797 | return -EINVAL; | 1837 | return -EINVAL; |
1798 | |||
1799 | q.pi_state = NULL; | ||
1800 | q.bitset = bitset; | 1838 | q.bitset = bitset; |
1801 | q.rt_waiter = NULL; | ||
1802 | q.requeue_pi_key = NULL; | ||
1803 | 1839 | ||
1804 | if (abs_time) { | 1840 | if (abs_time) { |
1805 | to = &timeout; | 1841 | to = &timeout; |
1806 | 1842 | ||
1807 | hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : | 1843 | hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? |
1808 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 1844 | CLOCK_REALTIME : CLOCK_MONOTONIC, |
1845 | HRTIMER_MODE_ABS); | ||
1809 | hrtimer_init_sleeper(to, current); | 1846 | hrtimer_init_sleeper(to, current); |
1810 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, | 1847 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, |
1811 | current->timer_slack_ns); | 1848 | current->timer_slack_ns); |
1812 | } | 1849 | } |
1813 | 1850 | ||
1814 | retry: | 1851 | retry: |
1815 | /* Prepare to wait on uaddr. */ | 1852 | /* |
1816 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 1853 | * Prepare to wait on uaddr. On success, holds hb lock and increments |
1854 | * q.key refs. | ||
1855 | */ | ||
1856 | ret = futex_wait_setup(uaddr, val, flags, &q, &hb); | ||
1817 | if (ret) | 1857 | if (ret) |
1818 | goto out; | 1858 | goto out; |
1819 | 1859 | ||
@@ -1822,42 +1862,34 @@ retry: | |||
1822 | 1862 | ||
1823 | /* If we were woken (and unqueued), we succeeded, whatever. */ | 1863 | /* If we were woken (and unqueued), we succeeded, whatever. */ |
1824 | ret = 0; | 1864 | ret = 0; |
1865 | /* unqueue_me() drops q.key ref */ | ||
1825 | if (!unqueue_me(&q)) | 1866 | if (!unqueue_me(&q)) |
1826 | goto out_put_key; | 1867 | goto out; |
1827 | ret = -ETIMEDOUT; | 1868 | ret = -ETIMEDOUT; |
1828 | if (to && !to->task) | 1869 | if (to && !to->task) |
1829 | goto out_put_key; | 1870 | goto out; |
1830 | 1871 | ||
1831 | /* | 1872 | /* |
1832 | * We expect signal_pending(current), but we might be the | 1873 | * We expect signal_pending(current), but we might be the |
1833 | * victim of a spurious wakeup as well. | 1874 | * victim of a spurious wakeup as well. |
1834 | */ | 1875 | */ |
1835 | if (!signal_pending(current)) { | 1876 | if (!signal_pending(current)) |
1836 | put_futex_key(fshared, &q.key); | ||
1837 | goto retry; | 1877 | goto retry; |
1838 | } | ||
1839 | 1878 | ||
1840 | ret = -ERESTARTSYS; | 1879 | ret = -ERESTARTSYS; |
1841 | if (!abs_time) | 1880 | if (!abs_time) |
1842 | goto out_put_key; | 1881 | goto out; |
1843 | 1882 | ||
1844 | restart = ¤t_thread_info()->restart_block; | 1883 | restart = ¤t_thread_info()->restart_block; |
1845 | restart->fn = futex_wait_restart; | 1884 | restart->fn = futex_wait_restart; |
1846 | restart->futex.uaddr = (u32 *)uaddr; | 1885 | restart->futex.uaddr = uaddr; |
1847 | restart->futex.val = val; | 1886 | restart->futex.val = val; |
1848 | restart->futex.time = abs_time->tv64; | 1887 | restart->futex.time = abs_time->tv64; |
1849 | restart->futex.bitset = bitset; | 1888 | restart->futex.bitset = bitset; |
1850 | restart->futex.flags = FLAGS_HAS_TIMEOUT; | 1889 | restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; |
1851 | |||
1852 | if (fshared) | ||
1853 | restart->futex.flags |= FLAGS_SHARED; | ||
1854 | if (clockrt) | ||
1855 | restart->futex.flags |= FLAGS_CLOCKRT; | ||
1856 | 1890 | ||
1857 | ret = -ERESTART_RESTARTBLOCK; | 1891 | ret = -ERESTART_RESTARTBLOCK; |
1858 | 1892 | ||
1859 | out_put_key: | ||
1860 | put_futex_key(fshared, &q.key); | ||
1861 | out: | 1893 | out: |
1862 | if (to) { | 1894 | if (to) { |
1863 | hrtimer_cancel(&to->timer); | 1895 | hrtimer_cancel(&to->timer); |
@@ -1869,8 +1901,7 @@ out: | |||
1869 | 1901 | ||
1870 | static long futex_wait_restart(struct restart_block *restart) | 1902 | static long futex_wait_restart(struct restart_block *restart) |
1871 | { | 1903 | { |
1872 | u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; | 1904 | u32 __user *uaddr = restart->futex.uaddr; |
1873 | int fshared = 0; | ||
1874 | ktime_t t, *tp = NULL; | 1905 | ktime_t t, *tp = NULL; |
1875 | 1906 | ||
1876 | if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { | 1907 | if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { |
@@ -1878,11 +1909,9 @@ static long futex_wait_restart(struct restart_block *restart) | |||
1878 | tp = &t; | 1909 | tp = &t; |
1879 | } | 1910 | } |
1880 | restart->fn = do_no_restart_syscall; | 1911 | restart->fn = do_no_restart_syscall; |
1881 | if (restart->futex.flags & FLAGS_SHARED) | 1912 | |
1882 | fshared = 1; | 1913 | return (long)futex_wait(uaddr, restart->futex.flags, |
1883 | return (long)futex_wait(uaddr, fshared, restart->futex.val, tp, | 1914 | restart->futex.val, tp, restart->futex.bitset); |
1884 | restart->futex.bitset, | ||
1885 | restart->futex.flags & FLAGS_CLOCKRT); | ||
1886 | } | 1915 | } |
1887 | 1916 | ||
1888 | 1917 | ||
@@ -1892,12 +1921,12 @@ static long futex_wait_restart(struct restart_block *restart) | |||
1892 | * if there are waiters then it will block, it does PI, etc. (Due to | 1921 | * if there are waiters then it will block, it does PI, etc. (Due to |
1893 | * races the kernel might see a 0 value of the futex too.) | 1922 | * races the kernel might see a 0 value of the futex too.) |
1894 | */ | 1923 | */ |
1895 | static int futex_lock_pi(u32 __user *uaddr, int fshared, | 1924 | static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect, |
1896 | int detect, ktime_t *time, int trylock) | 1925 | ktime_t *time, int trylock) |
1897 | { | 1926 | { |
1898 | struct hrtimer_sleeper timeout, *to = NULL; | 1927 | struct hrtimer_sleeper timeout, *to = NULL; |
1899 | struct futex_hash_bucket *hb; | 1928 | struct futex_hash_bucket *hb; |
1900 | struct futex_q q; | 1929 | struct futex_q q = futex_q_init; |
1901 | int res, ret; | 1930 | int res, ret; |
1902 | 1931 | ||
1903 | if (refill_pi_state_cache()) | 1932 | if (refill_pi_state_cache()) |
@@ -1911,12 +1940,8 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, | |||
1911 | hrtimer_set_expires(&to->timer, *time); | 1940 | hrtimer_set_expires(&to->timer, *time); |
1912 | } | 1941 | } |
1913 | 1942 | ||
1914 | q.pi_state = NULL; | ||
1915 | q.rt_waiter = NULL; | ||
1916 | q.requeue_pi_key = NULL; | ||
1917 | retry: | 1943 | retry: |
1918 | q.key = FUTEX_KEY_INIT; | 1944 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key); |
1919 | ret = get_futex_key(uaddr, fshared, &q.key); | ||
1920 | if (unlikely(ret != 0)) | 1945 | if (unlikely(ret != 0)) |
1921 | goto out; | 1946 | goto out; |
1922 | 1947 | ||
@@ -1938,7 +1963,7 @@ retry_private: | |||
1938 | * exit to complete. | 1963 | * exit to complete. |
1939 | */ | 1964 | */ |
1940 | queue_unlock(&q, hb); | 1965 | queue_unlock(&q, hb); |
1941 | put_futex_key(fshared, &q.key); | 1966 | put_futex_key(&q.key); |
1942 | cond_resched(); | 1967 | cond_resched(); |
1943 | goto retry; | 1968 | goto retry; |
1944 | default: | 1969 | default: |
@@ -1968,7 +1993,7 @@ retry_private: | |||
1968 | * Fixup the pi_state owner and possibly acquire the lock if we | 1993 | * Fixup the pi_state owner and possibly acquire the lock if we |
1969 | * haven't already. | 1994 | * haven't already. |
1970 | */ | 1995 | */ |
1971 | res = fixup_owner(uaddr, fshared, &q, !ret); | 1996 | res = fixup_owner(uaddr, &q, !ret); |
1972 | /* | 1997 | /* |
1973 | * If fixup_owner() returned an error, proprogate that. If it acquired | 1998 | * If fixup_owner() returned an error, proprogate that. If it acquired |
1974 | * the lock, clear our -ETIMEDOUT or -EINTR. | 1999 | * the lock, clear our -ETIMEDOUT or -EINTR. |
@@ -1992,7 +2017,7 @@ out_unlock_put_key: | |||
1992 | queue_unlock(&q, hb); | 2017 | queue_unlock(&q, hb); |
1993 | 2018 | ||
1994 | out_put_key: | 2019 | out_put_key: |
1995 | put_futex_key(fshared, &q.key); | 2020 | put_futex_key(&q.key); |
1996 | out: | 2021 | out: |
1997 | if (to) | 2022 | if (to) |
1998 | destroy_hrtimer_on_stack(&to->timer); | 2023 | destroy_hrtimer_on_stack(&to->timer); |
@@ -2005,10 +2030,10 @@ uaddr_faulted: | |||
2005 | if (ret) | 2030 | if (ret) |
2006 | goto out_put_key; | 2031 | goto out_put_key; |
2007 | 2032 | ||
2008 | if (!fshared) | 2033 | if (!(flags & FLAGS_SHARED)) |
2009 | goto retry_private; | 2034 | goto retry_private; |
2010 | 2035 | ||
2011 | put_futex_key(fshared, &q.key); | 2036 | put_futex_key(&q.key); |
2012 | goto retry; | 2037 | goto retry; |
2013 | } | 2038 | } |
2014 | 2039 | ||
@@ -2017,13 +2042,13 @@ uaddr_faulted: | |||
2017 | * This is the in-kernel slowpath: we look up the PI state (if any), | 2042 | * This is the in-kernel slowpath: we look up the PI state (if any), |
2018 | * and do the rt-mutex unlock. | 2043 | * and do the rt-mutex unlock. |
2019 | */ | 2044 | */ |
2020 | static int futex_unlock_pi(u32 __user *uaddr, int fshared) | 2045 | static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) |
2021 | { | 2046 | { |
2022 | struct futex_hash_bucket *hb; | 2047 | struct futex_hash_bucket *hb; |
2023 | struct futex_q *this, *next; | 2048 | struct futex_q *this, *next; |
2024 | u32 uval; | ||
2025 | struct plist_head *head; | 2049 | struct plist_head *head; |
2026 | union futex_key key = FUTEX_KEY_INIT; | 2050 | union futex_key key = FUTEX_KEY_INIT; |
2051 | u32 uval, vpid = task_pid_vnr(current); | ||
2027 | int ret; | 2052 | int ret; |
2028 | 2053 | ||
2029 | retry: | 2054 | retry: |
@@ -2032,10 +2057,10 @@ retry: | |||
2032 | /* | 2057 | /* |
2033 | * We release only a lock we actually own: | 2058 | * We release only a lock we actually own: |
2034 | */ | 2059 | */ |
2035 | if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) | 2060 | if ((uval & FUTEX_TID_MASK) != vpid) |
2036 | return -EPERM; | 2061 | return -EPERM; |
2037 | 2062 | ||
2038 | ret = get_futex_key(uaddr, fshared, &key); | 2063 | ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); |
2039 | if (unlikely(ret != 0)) | 2064 | if (unlikely(ret != 0)) |
2040 | goto out; | 2065 | goto out; |
2041 | 2066 | ||
@@ -2047,17 +2072,14 @@ retry: | |||
2047 | * again. If it succeeds then we can return without waking | 2072 | * again. If it succeeds then we can return without waking |
2048 | * anyone else up: | 2073 | * anyone else up: |
2049 | */ | 2074 | */ |
2050 | if (!(uval & FUTEX_OWNER_DIED)) | 2075 | if (!(uval & FUTEX_OWNER_DIED) && |
2051 | uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0); | 2076 | cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0)) |
2052 | |||
2053 | |||
2054 | if (unlikely(uval == -EFAULT)) | ||
2055 | goto pi_faulted; | 2077 | goto pi_faulted; |
2056 | /* | 2078 | /* |
2057 | * Rare case: we managed to release the lock atomically, | 2079 | * Rare case: we managed to release the lock atomically, |
2058 | * no need to wake anyone else up: | 2080 | * no need to wake anyone else up: |
2059 | */ | 2081 | */ |
2060 | if (unlikely(uval == task_pid_vnr(current))) | 2082 | if (unlikely(uval == vpid)) |
2061 | goto out_unlock; | 2083 | goto out_unlock; |
2062 | 2084 | ||
2063 | /* | 2085 | /* |
@@ -2090,14 +2112,14 @@ retry: | |||
2090 | 2112 | ||
2091 | out_unlock: | 2113 | out_unlock: |
2092 | spin_unlock(&hb->lock); | 2114 | spin_unlock(&hb->lock); |
2093 | put_futex_key(fshared, &key); | 2115 | put_futex_key(&key); |
2094 | 2116 | ||
2095 | out: | 2117 | out: |
2096 | return ret; | 2118 | return ret; |
2097 | 2119 | ||
2098 | pi_faulted: | 2120 | pi_faulted: |
2099 | spin_unlock(&hb->lock); | 2121 | spin_unlock(&hb->lock); |
2100 | put_futex_key(fshared, &key); | 2122 | put_futex_key(&key); |
2101 | 2123 | ||
2102 | ret = fault_in_user_writeable(uaddr); | 2124 | ret = fault_in_user_writeable(uaddr); |
2103 | if (!ret) | 2125 | if (!ret) |
@@ -2142,7 +2164,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2142 | * We were woken prior to requeue by a timeout or a signal. | 2164 | * We were woken prior to requeue by a timeout or a signal. |
2143 | * Unqueue the futex_q and determine which it was. | 2165 | * Unqueue the futex_q and determine which it was. |
2144 | */ | 2166 | */ |
2145 | plist_del(&q->list, &q->list.plist); | 2167 | plist_del(&q->list, &hb->chain); |
2146 | 2168 | ||
2147 | /* Handle spurious wakeups gracefully */ | 2169 | /* Handle spurious wakeups gracefully */ |
2148 | ret = -EWOULDBLOCK; | 2170 | ret = -EWOULDBLOCK; |
@@ -2157,7 +2179,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2157 | /** | 2179 | /** |
2158 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 | 2180 | * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 |
2159 | * @uaddr: the futex we initially wait on (non-pi) | 2181 | * @uaddr: the futex we initially wait on (non-pi) |
2160 | * @fshared: whether the futexes are shared (1) or not (0). They must be | 2182 | * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be |
2161 | * the same type, no requeueing from private to shared, etc. | 2183 | * the same type, no requeueing from private to shared, etc. |
2162 | * @val: the expected value of uaddr | 2184 | * @val: the expected value of uaddr |
2163 | * @abs_time: absolute timeout | 2185 | * @abs_time: absolute timeout |
@@ -2195,16 +2217,16 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2195 | * 0 - On success | 2217 | * 0 - On success |
2196 | * <0 - On error | 2218 | * <0 - On error |
2197 | */ | 2219 | */ |
2198 | static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | 2220 | static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
2199 | u32 val, ktime_t *abs_time, u32 bitset, | 2221 | u32 val, ktime_t *abs_time, u32 bitset, |
2200 | int clockrt, u32 __user *uaddr2) | 2222 | u32 __user *uaddr2) |
2201 | { | 2223 | { |
2202 | struct hrtimer_sleeper timeout, *to = NULL; | 2224 | struct hrtimer_sleeper timeout, *to = NULL; |
2203 | struct rt_mutex_waiter rt_waiter; | 2225 | struct rt_mutex_waiter rt_waiter; |
2204 | struct rt_mutex *pi_mutex = NULL; | 2226 | struct rt_mutex *pi_mutex = NULL; |
2205 | struct futex_hash_bucket *hb; | 2227 | struct futex_hash_bucket *hb; |
2206 | union futex_key key2; | 2228 | union futex_key key2 = FUTEX_KEY_INIT; |
2207 | struct futex_q q; | 2229 | struct futex_q q = futex_q_init; |
2208 | int res, ret; | 2230 | int res, ret; |
2209 | 2231 | ||
2210 | if (!bitset) | 2232 | if (!bitset) |
@@ -2212,8 +2234,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2212 | 2234 | ||
2213 | if (abs_time) { | 2235 | if (abs_time) { |
2214 | to = &timeout; | 2236 | to = &timeout; |
2215 | hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : | 2237 | hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? |
2216 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 2238 | CLOCK_REALTIME : CLOCK_MONOTONIC, |
2239 | HRTIMER_MODE_ABS); | ||
2217 | hrtimer_init_sleeper(to, current); | 2240 | hrtimer_init_sleeper(to, current); |
2218 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, | 2241 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, |
2219 | current->timer_slack_ns); | 2242 | current->timer_slack_ns); |
@@ -2226,18 +2249,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2226 | debug_rt_mutex_init_waiter(&rt_waiter); | 2249 | debug_rt_mutex_init_waiter(&rt_waiter); |
2227 | rt_waiter.task = NULL; | 2250 | rt_waiter.task = NULL; |
2228 | 2251 | ||
2229 | key2 = FUTEX_KEY_INIT; | 2252 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2); |
2230 | ret = get_futex_key(uaddr2, fshared, &key2); | ||
2231 | if (unlikely(ret != 0)) | 2253 | if (unlikely(ret != 0)) |
2232 | goto out; | 2254 | goto out; |
2233 | 2255 | ||
2234 | q.pi_state = NULL; | ||
2235 | q.bitset = bitset; | 2256 | q.bitset = bitset; |
2236 | q.rt_waiter = &rt_waiter; | 2257 | q.rt_waiter = &rt_waiter; |
2237 | q.requeue_pi_key = &key2; | 2258 | q.requeue_pi_key = &key2; |
2238 | 2259 | ||
2239 | /* Prepare to wait on uaddr. */ | 2260 | /* |
2240 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 2261 | * Prepare to wait on uaddr. On success, increments q.key (key1) ref |
2262 | * count. | ||
2263 | */ | ||
2264 | ret = futex_wait_setup(uaddr, val, flags, &q, &hb); | ||
2241 | if (ret) | 2265 | if (ret) |
2242 | goto out_key2; | 2266 | goto out_key2; |
2243 | 2267 | ||
@@ -2254,7 +2278,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2254 | * In order for us to be here, we know our q.key == key2, and since | 2278 | * In order for us to be here, we know our q.key == key2, and since |
2255 | * we took the hb->lock above, we also know that futex_requeue() has | 2279 | * we took the hb->lock above, we also know that futex_requeue() has |
2256 | * completed and we no longer have to concern ourselves with a wakeup | 2280 | * completed and we no longer have to concern ourselves with a wakeup |
2257 | * race with the atomic proxy lock acquition by the requeue code. | 2281 | * race with the atomic proxy lock acquisition by the requeue code. The |
2282 | * futex_requeue dropped our key1 reference and incremented our key2 | ||
2283 | * reference count. | ||
2258 | */ | 2284 | */ |
2259 | 2285 | ||
2260 | /* Check if the requeue code acquired the second futex for us. */ | 2286 | /* Check if the requeue code acquired the second futex for us. */ |
@@ -2265,8 +2291,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2265 | */ | 2291 | */ |
2266 | if (q.pi_state && (q.pi_state->owner != current)) { | 2292 | if (q.pi_state && (q.pi_state->owner != current)) { |
2267 | spin_lock(q.lock_ptr); | 2293 | spin_lock(q.lock_ptr); |
2268 | ret = fixup_pi_state_owner(uaddr2, &q, current, | 2294 | ret = fixup_pi_state_owner(uaddr2, &q, current); |
2269 | fshared); | ||
2270 | spin_unlock(q.lock_ptr); | 2295 | spin_unlock(q.lock_ptr); |
2271 | } | 2296 | } |
2272 | } else { | 2297 | } else { |
@@ -2285,7 +2310,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2285 | * Fixup the pi_state owner and possibly acquire the lock if we | 2310 | * Fixup the pi_state owner and possibly acquire the lock if we |
2286 | * haven't already. | 2311 | * haven't already. |
2287 | */ | 2312 | */ |
2288 | res = fixup_owner(uaddr2, fshared, &q, !ret); | 2313 | res = fixup_owner(uaddr2, &q, !ret); |
2289 | /* | 2314 | /* |
2290 | * If fixup_owner() returned an error, proprogate that. If it | 2315 | * If fixup_owner() returned an error, proprogate that. If it |
2291 | * acquired the lock, clear -ETIMEDOUT or -EINTR. | 2316 | * acquired the lock, clear -ETIMEDOUT or -EINTR. |
@@ -2316,9 +2341,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2316 | } | 2341 | } |
2317 | 2342 | ||
2318 | out_put_keys: | 2343 | out_put_keys: |
2319 | put_futex_key(fshared, &q.key); | 2344 | put_futex_key(&q.key); |
2320 | out_key2: | 2345 | out_key2: |
2321 | put_futex_key(fshared, &key2); | 2346 | put_futex_key(&key2); |
2322 | 2347 | ||
2323 | out: | 2348 | out: |
2324 | if (to) { | 2349 | if (to) { |
@@ -2393,10 +2418,19 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, | |||
2393 | goto err_unlock; | 2418 | goto err_unlock; |
2394 | ret = -EPERM; | 2419 | ret = -EPERM; |
2395 | pcred = __task_cred(p); | 2420 | pcred = __task_cred(p); |
2421 | /* If victim is in different user_ns, then uids are not | ||
2422 | comparable, so we must have CAP_SYS_PTRACE */ | ||
2423 | if (cred->user->user_ns != pcred->user->user_ns) { | ||
2424 | if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) | ||
2425 | goto err_unlock; | ||
2426 | goto ok; | ||
2427 | } | ||
2428 | /* If victim is in same user_ns, then uids are comparable */ | ||
2396 | if (cred->euid != pcred->euid && | 2429 | if (cred->euid != pcred->euid && |
2397 | cred->euid != pcred->uid && | 2430 | cred->euid != pcred->uid && |
2398 | !capable(CAP_SYS_PTRACE)) | 2431 | !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) |
2399 | goto err_unlock; | 2432 | goto err_unlock; |
2433 | ok: | ||
2400 | head = p->robust_list; | 2434 | head = p->robust_list; |
2401 | rcu_read_unlock(); | 2435 | rcu_read_unlock(); |
2402 | } | 2436 | } |
@@ -2435,11 +2469,20 @@ retry: | |||
2435 | * userspace. | 2469 | * userspace. |
2436 | */ | 2470 | */ |
2437 | mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; | 2471 | mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; |
2438 | nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); | 2472 | /* |
2439 | 2473 | * We are not holding a lock here, but we want to have | |
2440 | if (nval == -EFAULT) | 2474 | * the pagefault_disable/enable() protection because |
2441 | return -1; | 2475 | * we want to handle the fault gracefully. If the |
2442 | 2476 | * access fails we try to fault in the futex with R/W | |
2477 | * verification via get_user_pages. get_user() above | ||
2478 | * does not guarantee R/W access. If that fails we | ||
2479 | * give up and leave the futex locked. | ||
2480 | */ | ||
2481 | if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) { | ||
2482 | if (fault_in_user_writeable(uaddr)) | ||
2483 | return -1; | ||
2484 | goto retry; | ||
2485 | } | ||
2443 | if (nval != uval) | 2486 | if (nval != uval) |
2444 | goto retry; | 2487 | goto retry; |
2445 | 2488 | ||
@@ -2458,7 +2501,7 @@ retry: | |||
2458 | */ | 2501 | */ |
2459 | static inline int fetch_robust_entry(struct robust_list __user **entry, | 2502 | static inline int fetch_robust_entry(struct robust_list __user **entry, |
2460 | struct robust_list __user * __user *head, | 2503 | struct robust_list __user * __user *head, |
2461 | int *pi) | 2504 | unsigned int *pi) |
2462 | { | 2505 | { |
2463 | unsigned long uentry; | 2506 | unsigned long uentry; |
2464 | 2507 | ||
@@ -2481,7 +2524,8 @@ void exit_robust_list(struct task_struct *curr) | |||
2481 | { | 2524 | { |
2482 | struct robust_list_head __user *head = curr->robust_list; | 2525 | struct robust_list_head __user *head = curr->robust_list; |
2483 | struct robust_list __user *entry, *next_entry, *pending; | 2526 | struct robust_list __user *entry, *next_entry, *pending; |
2484 | unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; | 2527 | unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; |
2528 | unsigned int uninitialized_var(next_pi); | ||
2485 | unsigned long futex_offset; | 2529 | unsigned long futex_offset; |
2486 | int rc; | 2530 | int rc; |
2487 | 2531 | ||
@@ -2542,58 +2586,57 @@ void exit_robust_list(struct task_struct *curr) | |||
2542 | long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | 2586 | long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, |
2543 | u32 __user *uaddr2, u32 val2, u32 val3) | 2587 | u32 __user *uaddr2, u32 val2, u32 val3) |
2544 | { | 2588 | { |
2545 | int clockrt, ret = -ENOSYS; | 2589 | int ret = -ENOSYS, cmd = op & FUTEX_CMD_MASK; |
2546 | int cmd = op & FUTEX_CMD_MASK; | 2590 | unsigned int flags = 0; |
2547 | int fshared = 0; | ||
2548 | 2591 | ||
2549 | if (!(op & FUTEX_PRIVATE_FLAG)) | 2592 | if (!(op & FUTEX_PRIVATE_FLAG)) |
2550 | fshared = 1; | 2593 | flags |= FLAGS_SHARED; |
2551 | 2594 | ||
2552 | clockrt = op & FUTEX_CLOCK_REALTIME; | 2595 | if (op & FUTEX_CLOCK_REALTIME) { |
2553 | if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) | 2596 | flags |= FLAGS_CLOCKRT; |
2554 | return -ENOSYS; | 2597 | if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) |
2598 | return -ENOSYS; | ||
2599 | } | ||
2555 | 2600 | ||
2556 | switch (cmd) { | 2601 | switch (cmd) { |
2557 | case FUTEX_WAIT: | 2602 | case FUTEX_WAIT: |
2558 | val3 = FUTEX_BITSET_MATCH_ANY; | 2603 | val3 = FUTEX_BITSET_MATCH_ANY; |
2559 | case FUTEX_WAIT_BITSET: | 2604 | case FUTEX_WAIT_BITSET: |
2560 | ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt); | 2605 | ret = futex_wait(uaddr, flags, val, timeout, val3); |
2561 | break; | 2606 | break; |
2562 | case FUTEX_WAKE: | 2607 | case FUTEX_WAKE: |
2563 | val3 = FUTEX_BITSET_MATCH_ANY; | 2608 | val3 = FUTEX_BITSET_MATCH_ANY; |
2564 | case FUTEX_WAKE_BITSET: | 2609 | case FUTEX_WAKE_BITSET: |
2565 | ret = futex_wake(uaddr, fshared, val, val3); | 2610 | ret = futex_wake(uaddr, flags, val, val3); |
2566 | break; | 2611 | break; |
2567 | case FUTEX_REQUEUE: | 2612 | case FUTEX_REQUEUE: |
2568 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); | 2613 | ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0); |
2569 | break; | 2614 | break; |
2570 | case FUTEX_CMP_REQUEUE: | 2615 | case FUTEX_CMP_REQUEUE: |
2571 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, | 2616 | ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0); |
2572 | 0); | ||
2573 | break; | 2617 | break; |
2574 | case FUTEX_WAKE_OP: | 2618 | case FUTEX_WAKE_OP: |
2575 | ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); | 2619 | ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); |
2576 | break; | 2620 | break; |
2577 | case FUTEX_LOCK_PI: | 2621 | case FUTEX_LOCK_PI: |
2578 | if (futex_cmpxchg_enabled) | 2622 | if (futex_cmpxchg_enabled) |
2579 | ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); | 2623 | ret = futex_lock_pi(uaddr, flags, val, timeout, 0); |
2580 | break; | 2624 | break; |
2581 | case FUTEX_UNLOCK_PI: | 2625 | case FUTEX_UNLOCK_PI: |
2582 | if (futex_cmpxchg_enabled) | 2626 | if (futex_cmpxchg_enabled) |
2583 | ret = futex_unlock_pi(uaddr, fshared); | 2627 | ret = futex_unlock_pi(uaddr, flags); |
2584 | break; | 2628 | break; |
2585 | case FUTEX_TRYLOCK_PI: | 2629 | case FUTEX_TRYLOCK_PI: |
2586 | if (futex_cmpxchg_enabled) | 2630 | if (futex_cmpxchg_enabled) |
2587 | ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); | 2631 | ret = futex_lock_pi(uaddr, flags, 0, timeout, 1); |
2588 | break; | 2632 | break; |
2589 | case FUTEX_WAIT_REQUEUE_PI: | 2633 | case FUTEX_WAIT_REQUEUE_PI: |
2590 | val3 = FUTEX_BITSET_MATCH_ANY; | 2634 | val3 = FUTEX_BITSET_MATCH_ANY; |
2591 | ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, | 2635 | ret = futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, |
2592 | clockrt, uaddr2); | 2636 | uaddr2); |
2593 | break; | 2637 | break; |
2594 | case FUTEX_CMP_REQUEUE_PI: | 2638 | case FUTEX_CMP_REQUEUE_PI: |
2595 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, | 2639 | ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1); |
2596 | 1); | ||
2597 | break; | 2640 | break; |
2598 | default: | 2641 | default: |
2599 | ret = -ENOSYS; | 2642 | ret = -ENOSYS; |
@@ -2647,11 +2690,10 @@ static int __init futex_init(void) | |||
2647 | * of the complex code paths. Also we want to prevent | 2690 | * of the complex code paths. Also we want to prevent |
2648 | * registration of robust lists in that case. NULL is | 2691 | * registration of robust lists in that case. NULL is |
2649 | * guaranteed to fault and we get -EFAULT on functional | 2692 | * guaranteed to fault and we get -EFAULT on functional |
2650 | * implementation, the non functional ones will return | 2693 | * implementation, the non-functional ones will return |
2651 | * -ENOSYS. | 2694 | * -ENOSYS. |
2652 | */ | 2695 | */ |
2653 | curval = cmpxchg_futex_value_locked(NULL, 0, 0); | 2696 | if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) |
2654 | if (curval == -EFAULT) | ||
2655 | futex_cmpxchg_enabled = 1; | 2697 | futex_cmpxchg_enabled = 1; |
2656 | 2698 | ||
2657 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { | 2699 | for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { |