aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/futex.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c524
1 files changed, 283 insertions, 241 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 6a3a5fa1526d..fe28dc282eae 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -69,6 +69,14 @@ int __read_mostly futex_cmpxchg_enabled;
69#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) 69#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
70 70
71/* 71/*
72 * Futex flags used to encode options to functions and preserve them across
73 * restarts.
74 */
75#define FLAGS_SHARED 0x01
76#define FLAGS_CLOCKRT 0x02
77#define FLAGS_HAS_TIMEOUT 0x04
78
79/*
72 * Priority Inheritance state: 80 * Priority Inheritance state:
73 */ 81 */
74struct futex_pi_state { 82struct futex_pi_state {
@@ -91,6 +99,7 @@ struct futex_pi_state {
91 99
92/** 100/**
93 * struct futex_q - The hashed futex queue entry, one per waiting task 101 * struct futex_q - The hashed futex queue entry, one per waiting task
102 * @list: priority-sorted list of tasks waiting on this futex
94 * @task: the task waiting on the futex 103 * @task: the task waiting on the futex
95 * @lock_ptr: the hash bucket lock 104 * @lock_ptr: the hash bucket lock
96 * @key: the key the futex is hashed on 105 * @key: the key the futex is hashed on
@@ -104,7 +113,7 @@ struct futex_pi_state {
104 * 113 *
105 * A futex_q has a woken state, just like tasks have TASK_RUNNING. 114 * A futex_q has a woken state, just like tasks have TASK_RUNNING.
106 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0. 115 * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
107 * The order of wakup is always to make the first condition true, then 116 * The order of wakeup is always to make the first condition true, then
108 * the second. 117 * the second.
109 * 118 *
110 * PI futexes are typically woken before they are removed from the hash list via 119 * PI futexes are typically woken before they are removed from the hash list via
@@ -122,6 +131,12 @@ struct futex_q {
122 u32 bitset; 131 u32 bitset;
123}; 132};
124 133
134static const struct futex_q futex_q_init = {
135 /* list gets initialized in queue_me()*/
136 .key = FUTEX_KEY_INIT,
137 .bitset = FUTEX_BITSET_MATCH_ANY
138};
139
125/* 140/*
126 * Hash buckets are shared by all the futex_keys that hash to the same 141 * Hash buckets are shared by all the futex_keys that hash to the same
127 * location. Each key may have multiple futex_q structures, one for each task 142 * location. Each key may have multiple futex_q structures, one for each task
@@ -168,7 +183,7 @@ static void get_futex_key_refs(union futex_key *key)
168 183
169 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { 184 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
170 case FUT_OFF_INODE: 185 case FUT_OFF_INODE:
171 atomic_inc(&key->shared.inode->i_count); 186 ihold(key->shared.inode);
172 break; 187 break;
173 case FUT_OFF_MMSHARED: 188 case FUT_OFF_MMSHARED:
174 atomic_inc(&key->private.mm->mm_count); 189 atomic_inc(&key->private.mm->mm_count);
@@ -218,7 +233,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
218{ 233{
219 unsigned long address = (unsigned long)uaddr; 234 unsigned long address = (unsigned long)uaddr;
220 struct mm_struct *mm = current->mm; 235 struct mm_struct *mm = current->mm;
221 struct page *page; 236 struct page *page, *page_head;
222 int err; 237 int err;
223 238
224 /* 239 /*
@@ -250,11 +265,46 @@ again:
250 if (err < 0) 265 if (err < 0)
251 return err; 266 return err;
252 267
253 page = compound_head(page); 268#ifdef CONFIG_TRANSPARENT_HUGEPAGE
254 lock_page(page); 269 page_head = page;
255 if (!page->mapping) { 270 if (unlikely(PageTail(page))) {
256 unlock_page(page);
257 put_page(page); 271 put_page(page);
272 /* serialize against __split_huge_page_splitting() */
273 local_irq_disable();
274 if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
275 page_head = compound_head(page);
276 /*
277 * page_head is valid pointer but we must pin
278 * it before taking the PG_lock and/or
279 * PG_compound_lock. The moment we re-enable
280 * irqs __split_huge_page_splitting() can
281 * return and the head page can be freed from
282 * under us. We can't take the PG_lock and/or
283 * PG_compound_lock on a page that could be
284 * freed from under us.
285 */
286 if (page != page_head) {
287 get_page(page_head);
288 put_page(page);
289 }
290 local_irq_enable();
291 } else {
292 local_irq_enable();
293 goto again;
294 }
295 }
296#else
297 page_head = compound_head(page);
298 if (page != page_head) {
299 get_page(page_head);
300 put_page(page);
301 }
302#endif
303
304 lock_page(page_head);
305 if (!page_head->mapping) {
306 unlock_page(page_head);
307 put_page(page_head);
258 goto again; 308 goto again;
259 } 309 }
260 310
@@ -265,25 +315,24 @@ again:
265 * it's a read-only handle, it's expected that futexes attach to 315 * it's a read-only handle, it's expected that futexes attach to
266 * the object not the particular process. 316 * the object not the particular process.
267 */ 317 */
268 if (PageAnon(page)) { 318 if (PageAnon(page_head)) {
269 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ 319 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
270 key->private.mm = mm; 320 key->private.mm = mm;
271 key->private.address = address; 321 key->private.address = address;
272 } else { 322 } else {
273 key->both.offset |= FUT_OFF_INODE; /* inode-based key */ 323 key->both.offset |= FUT_OFF_INODE; /* inode-based key */
274 key->shared.inode = page->mapping->host; 324 key->shared.inode = page_head->mapping->host;
275 key->shared.pgoff = page->index; 325 key->shared.pgoff = page_head->index;
276 } 326 }
277 327
278 get_futex_key_refs(key); 328 get_futex_key_refs(key);
279 329
280 unlock_page(page); 330 unlock_page(page_head);
281 put_page(page); 331 put_page(page_head);
282 return 0; 332 return 0;
283} 333}
284 334
285static inline 335static inline void put_futex_key(union futex_key *key)
286void put_futex_key(int fshared, union futex_key *key)
287{ 336{
288 drop_futex_key_refs(key); 337 drop_futex_key_refs(key);
289} 338}
@@ -295,7 +344,7 @@ void put_futex_key(int fshared, union futex_key *key)
295 * Slow path to fixup the fault we just took in the atomic write 344 * Slow path to fixup the fault we just took in the atomic write
296 * access to @uaddr. 345 * access to @uaddr.
297 * 346 *
298 * We have no generic implementation of a non destructive write to the 347 * We have no generic implementation of a non-destructive write to the
299 * user address. We know that we faulted in the atomic pagefault 348 * user address. We know that we faulted in the atomic pagefault
300 * disabled section so we can as well avoid the #PF overhead by 349 * disabled section so we can as well avoid the #PF overhead by
301 * calling get_user_pages() right away. 350 * calling get_user_pages() right away.
@@ -332,15 +381,16 @@ static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
332 return NULL; 381 return NULL;
333} 382}
334 383
335static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) 384static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
385 u32 uval, u32 newval)
336{ 386{
337 u32 curval; 387 int ret;
338 388
339 pagefault_disable(); 389 pagefault_disable();
340 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); 390 ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
341 pagefault_enable(); 391 pagefault_enable();
342 392
343 return curval; 393 return ret;
344} 394}
345 395
346static int get_futex_value_locked(u32 *dest, u32 __user *from) 396static int get_futex_value_locked(u32 *dest, u32 __user *from)
@@ -515,7 +565,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
515 */ 565 */
516 pi_state = this->pi_state; 566 pi_state = this->pi_state;
517 /* 567 /*
518 * Userspace might have messed up non PI and PI futexes 568 * Userspace might have messed up non-PI and PI futexes
519 */ 569 */
520 if (unlikely(!pi_state)) 570 if (unlikely(!pi_state))
521 return -EINVAL; 571 return -EINVAL;
@@ -625,7 +675,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
625 struct task_struct *task, int set_waiters) 675 struct task_struct *task, int set_waiters)
626{ 676{
627 int lock_taken, ret, ownerdied = 0; 677 int lock_taken, ret, ownerdied = 0;
628 u32 uval, newval, curval; 678 u32 uval, newval, curval, vpid = task_pid_vnr(task);
629 679
630retry: 680retry:
631 ret = lock_taken = 0; 681 ret = lock_taken = 0;
@@ -635,19 +685,17 @@ retry:
635 * (by doing a 0 -> TID atomic cmpxchg), while holding all 685 * (by doing a 0 -> TID atomic cmpxchg), while holding all
636 * the locks. It will most likely not succeed. 686 * the locks. It will most likely not succeed.
637 */ 687 */
638 newval = task_pid_vnr(task); 688 newval = vpid;
639 if (set_waiters) 689 if (set_waiters)
640 newval |= FUTEX_WAITERS; 690 newval |= FUTEX_WAITERS;
641 691
642 curval = cmpxchg_futex_value_locked(uaddr, 0, newval); 692 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
643
644 if (unlikely(curval == -EFAULT))
645 return -EFAULT; 693 return -EFAULT;
646 694
647 /* 695 /*
648 * Detect deadlocks. 696 * Detect deadlocks.
649 */ 697 */
650 if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task)))) 698 if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
651 return -EDEADLK; 699 return -EDEADLK;
652 700
653 /* 701 /*
@@ -674,14 +722,12 @@ retry:
674 */ 722 */
675 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { 723 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
676 /* Keep the OWNER_DIED bit */ 724 /* Keep the OWNER_DIED bit */
677 newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task); 725 newval = (curval & ~FUTEX_TID_MASK) | vpid;
678 ownerdied = 0; 726 ownerdied = 0;
679 lock_taken = 1; 727 lock_taken = 1;
680 } 728 }
681 729
682 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 730 if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
683
684 if (unlikely(curval == -EFAULT))
685 return -EFAULT; 731 return -EFAULT;
686 if (unlikely(curval != uval)) 732 if (unlikely(curval != uval))
687 goto retry; 733 goto retry;
@@ -726,6 +772,24 @@ retry:
726 return ret; 772 return ret;
727} 773}
728 774
775/**
776 * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
777 * @q: The futex_q to unqueue
778 *
779 * The q->lock_ptr must not be NULL and must be held by the caller.
780 */
781static void __unqueue_futex(struct futex_q *q)
782{
783 struct futex_hash_bucket *hb;
784
785 if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
786 || WARN_ON(plist_node_empty(&q->list)))
787 return;
788
789 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
790 plist_del(&q->list, &hb->chain);
791}
792
729/* 793/*
730 * The hash bucket lock must be held when this is called. 794 * The hash bucket lock must be held when this is called.
731 * Afterwards, the futex_q must not be accessed. 795 * Afterwards, the futex_q must not be accessed.
@@ -736,14 +800,14 @@ static void wake_futex(struct futex_q *q)
736 800
737 /* 801 /*
738 * We set q->lock_ptr = NULL _before_ we wake up the task. If 802 * We set q->lock_ptr = NULL _before_ we wake up the task. If
739 * a non futex wake up happens on another CPU then the task 803 * a non-futex wake up happens on another CPU then the task
740 * might exit and p would dereference a non existing task 804 * might exit and p would dereference a non-existing task
741 * struct. Prevent this by holding a reference on p across the 805 * struct. Prevent this by holding a reference on p across the
742 * wake up. 806 * wake up.
743 */ 807 */
744 get_task_struct(p); 808 get_task_struct(p);
745 809
746 plist_del(&q->list, &q->list.plist); 810 __unqueue_futex(q);
747 /* 811 /*
748 * The waiting task can free the futex_q as soon as 812 * The waiting task can free the futex_q as soon as
749 * q->lock_ptr = NULL is written, without taking any locks. A 813 * q->lock_ptr = NULL is written, without taking any locks. A
@@ -777,10 +841,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
777 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); 841 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
778 842
779 /* 843 /*
780 * This happens when we have stolen the lock and the original 844 * It is possible that the next waiter (the one that brought
781 * pending owner did not enqueue itself back on the rt_mutex. 845 * this owner to the kernel) timed out and is no longer
782 * Thats not a tragedy. We know that way, that a lock waiter 846 * waiting on the lock.
783 * is on the fly. We make the futex_q waiter the pending owner.
784 */ 847 */
785 if (!new_owner) 848 if (!new_owner)
786 new_owner = this->task; 849 new_owner = this->task;
@@ -795,9 +858,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
795 858
796 newval = FUTEX_WAITERS | task_pid_vnr(new_owner); 859 newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
797 860
798 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 861 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
799
800 if (curval == -EFAULT)
801 ret = -EFAULT; 862 ret = -EFAULT;
802 else if (curval != uval) 863 else if (curval != uval)
803 ret = -EINVAL; 864 ret = -EINVAL;
@@ -832,10 +893,8 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
832 * There is no waiter, so we unlock the futex. The owner died 893 * There is no waiter, so we unlock the futex. The owner died
833 * bit has not to be preserved here. We are the owner: 894 * bit has not to be preserved here. We are the owner:
834 */ 895 */
835 oldval = cmpxchg_futex_value_locked(uaddr, uval, 0); 896 if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
836 897 return -EFAULT;
837 if (oldval == -EFAULT)
838 return oldval;
839 if (oldval != uval) 898 if (oldval != uval)
840 return -EAGAIN; 899 return -EAGAIN;
841 900
@@ -869,7 +928,8 @@ double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
869/* 928/*
870 * Wake up waiters matching bitset queued on this futex (uaddr). 929 * Wake up waiters matching bitset queued on this futex (uaddr).
871 */ 930 */
872static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) 931static int
932futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
873{ 933{
874 struct futex_hash_bucket *hb; 934 struct futex_hash_bucket *hb;
875 struct futex_q *this, *next; 935 struct futex_q *this, *next;
@@ -880,7 +940,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
880 if (!bitset) 940 if (!bitset)
881 return -EINVAL; 941 return -EINVAL;
882 942
883 ret = get_futex_key(uaddr, fshared, &key); 943 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
884 if (unlikely(ret != 0)) 944 if (unlikely(ret != 0))
885 goto out; 945 goto out;
886 946
@@ -906,7 +966,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
906 } 966 }
907 967
908 spin_unlock(&hb->lock); 968 spin_unlock(&hb->lock);
909 put_futex_key(fshared, &key); 969 put_futex_key(&key);
910out: 970out:
911 return ret; 971 return ret;
912} 972}
@@ -916,7 +976,7 @@ out:
916 * to this virtual address: 976 * to this virtual address:
917 */ 977 */
918static int 978static int
919futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, 979futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
920 int nr_wake, int nr_wake2, int op) 980 int nr_wake, int nr_wake2, int op)
921{ 981{
922 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; 982 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
@@ -926,10 +986,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
926 int ret, op_ret; 986 int ret, op_ret;
927 987
928retry: 988retry:
929 ret = get_futex_key(uaddr1, fshared, &key1); 989 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
930 if (unlikely(ret != 0)) 990 if (unlikely(ret != 0))
931 goto out; 991 goto out;
932 ret = get_futex_key(uaddr2, fshared, &key2); 992 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
933 if (unlikely(ret != 0)) 993 if (unlikely(ret != 0))
934 goto out_put_key1; 994 goto out_put_key1;
935 995
@@ -961,11 +1021,11 @@ retry_private:
961 if (ret) 1021 if (ret)
962 goto out_put_keys; 1022 goto out_put_keys;
963 1023
964 if (!fshared) 1024 if (!(flags & FLAGS_SHARED))
965 goto retry_private; 1025 goto retry_private;
966 1026
967 put_futex_key(fshared, &key2); 1027 put_futex_key(&key2);
968 put_futex_key(fshared, &key1); 1028 put_futex_key(&key1);
969 goto retry; 1029 goto retry;
970 } 1030 }
971 1031
@@ -995,9 +1055,9 @@ retry_private:
995 1055
996 double_unlock_hb(hb1, hb2); 1056 double_unlock_hb(hb1, hb2);
997out_put_keys: 1057out_put_keys:
998 put_futex_key(fshared, &key2); 1058 put_futex_key(&key2);
999out_put_key1: 1059out_put_key1:
1000 put_futex_key(fshared, &key1); 1060 put_futex_key(&key1);
1001out: 1061out:
1002 return ret; 1062 return ret;
1003} 1063}
@@ -1022,9 +1082,6 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1022 plist_del(&q->list, &hb1->chain); 1082 plist_del(&q->list, &hb1->chain);
1023 plist_add(&q->list, &hb2->chain); 1083 plist_add(&q->list, &hb2->chain);
1024 q->lock_ptr = &hb2->lock; 1084 q->lock_ptr = &hb2->lock;
1025#ifdef CONFIG_DEBUG_PI_LIST
1026 q->list.plist.spinlock = &hb2->lock;
1027#endif
1028 } 1085 }
1029 get_futex_key_refs(key2); 1086 get_futex_key_refs(key2);
1030 q->key = *key2; 1087 q->key = *key2;
@@ -1051,16 +1108,12 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1051 get_futex_key_refs(key); 1108 get_futex_key_refs(key);
1052 q->key = *key; 1109 q->key = *key;
1053 1110
1054 WARN_ON(plist_node_empty(&q->list)); 1111 __unqueue_futex(q);
1055 plist_del(&q->list, &q->list.plist);
1056 1112
1057 WARN_ON(!q->rt_waiter); 1113 WARN_ON(!q->rt_waiter);
1058 q->rt_waiter = NULL; 1114 q->rt_waiter = NULL;
1059 1115
1060 q->lock_ptr = &hb->lock; 1116 q->lock_ptr = &hb->lock;
1061#ifdef CONFIG_DEBUG_PI_LIST
1062 q->list.plist.spinlock = &hb->lock;
1063#endif
1064 1117
1065 wake_up_state(q->task, TASK_NORMAL); 1118 wake_up_state(q->task, TASK_NORMAL);
1066} 1119}
@@ -1131,12 +1184,14 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1131 1184
1132/** 1185/**
1133 * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 1186 * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
1134 * uaddr1: source futex user address 1187 * @uaddr1: source futex user address
1135 * uaddr2: target futex user address 1188 * @flags: futex flags (FLAGS_SHARED, etc.)
1136 * nr_wake: number of waiters to wake (must be 1 for requeue_pi) 1189 * @uaddr2: target futex user address
1137 * nr_requeue: number of waiters to requeue (0-INT_MAX) 1190 * @nr_wake: number of waiters to wake (must be 1 for requeue_pi)
1138 * requeue_pi: if we are attempting to requeue from a non-pi futex to a 1191 * @nr_requeue: number of waiters to requeue (0-INT_MAX)
1139 * pi futex (pi to pi requeue is not supported) 1192 * @cmpval: @uaddr1 expected value (or %NULL)
1193 * @requeue_pi: if we are attempting to requeue from a non-pi futex to a
1194 * pi futex (pi to pi requeue is not supported)
1140 * 1195 *
1141 * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire 1196 * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
1142 * uaddr2 atomically on behalf of the top waiter. 1197 * uaddr2 atomically on behalf of the top waiter.
@@ -1145,9 +1200,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1145 * >=0 - on success, the number of tasks requeued or woken 1200 * >=0 - on success, the number of tasks requeued or woken
1146 * <0 - on error 1201 * <0 - on error
1147 */ 1202 */
1148static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, 1203static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
1149 int nr_wake, int nr_requeue, u32 *cmpval, 1204 u32 __user *uaddr2, int nr_wake, int nr_requeue,
1150 int requeue_pi) 1205 u32 *cmpval, int requeue_pi)
1151{ 1206{
1152 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; 1207 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1153 int drop_count = 0, task_count = 0, ret; 1208 int drop_count = 0, task_count = 0, ret;
@@ -1188,10 +1243,10 @@ retry:
1188 pi_state = NULL; 1243 pi_state = NULL;
1189 } 1244 }
1190 1245
1191 ret = get_futex_key(uaddr1, fshared, &key1); 1246 ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1);
1192 if (unlikely(ret != 0)) 1247 if (unlikely(ret != 0))
1193 goto out; 1248 goto out;
1194 ret = get_futex_key(uaddr2, fshared, &key2); 1249 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
1195 if (unlikely(ret != 0)) 1250 if (unlikely(ret != 0))
1196 goto out_put_key1; 1251 goto out_put_key1;
1197 1252
@@ -1213,11 +1268,11 @@ retry_private:
1213 if (ret) 1268 if (ret)
1214 goto out_put_keys; 1269 goto out_put_keys;
1215 1270
1216 if (!fshared) 1271 if (!(flags & FLAGS_SHARED))
1217 goto retry_private; 1272 goto retry_private;
1218 1273
1219 put_futex_key(fshared, &key2); 1274 put_futex_key(&key2);
1220 put_futex_key(fshared, &key1); 1275 put_futex_key(&key1);
1221 goto retry; 1276 goto retry;
1222 } 1277 }
1223 if (curval != *cmpval) { 1278 if (curval != *cmpval) {
@@ -1257,8 +1312,8 @@ retry_private:
1257 break; 1312 break;
1258 case -EFAULT: 1313 case -EFAULT:
1259 double_unlock_hb(hb1, hb2); 1314 double_unlock_hb(hb1, hb2);
1260 put_futex_key(fshared, &key2); 1315 put_futex_key(&key2);
1261 put_futex_key(fshared, &key1); 1316 put_futex_key(&key1);
1262 ret = fault_in_user_writeable(uaddr2); 1317 ret = fault_in_user_writeable(uaddr2);
1263 if (!ret) 1318 if (!ret)
1264 goto retry; 1319 goto retry;
@@ -1266,8 +1321,8 @@ retry_private:
1266 case -EAGAIN: 1321 case -EAGAIN:
1267 /* The owner was exiting, try again. */ 1322 /* The owner was exiting, try again. */
1268 double_unlock_hb(hb1, hb2); 1323 double_unlock_hb(hb1, hb2);
1269 put_futex_key(fshared, &key2); 1324 put_futex_key(&key2);
1270 put_futex_key(fshared, &key1); 1325 put_futex_key(&key1);
1271 cond_resched(); 1326 cond_resched();
1272 goto retry; 1327 goto retry;
1273 default: 1328 default:
@@ -1349,9 +1404,9 @@ out_unlock:
1349 drop_futex_key_refs(&key1); 1404 drop_futex_key_refs(&key1);
1350 1405
1351out_put_keys: 1406out_put_keys:
1352 put_futex_key(fshared, &key2); 1407 put_futex_key(&key2);
1353out_put_key1: 1408out_put_key1:
1354 put_futex_key(fshared, &key1); 1409 put_futex_key(&key1);
1355out: 1410out:
1356 if (pi_state != NULL) 1411 if (pi_state != NULL)
1357 free_pi_state(pi_state); 1412 free_pi_state(pi_state);
@@ -1360,10 +1415,10 @@ out:
1360 1415
1361/* The key must be already stored in q->key. */ 1416/* The key must be already stored in q->key. */
1362static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) 1417static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1418 __acquires(&hb->lock)
1363{ 1419{
1364 struct futex_hash_bucket *hb; 1420 struct futex_hash_bucket *hb;
1365 1421
1366 get_futex_key_refs(&q->key);
1367 hb = hash_futex(&q->key); 1422 hb = hash_futex(&q->key);
1368 q->lock_ptr = &hb->lock; 1423 q->lock_ptr = &hb->lock;
1369 1424
@@ -1373,9 +1428,9 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1373 1428
1374static inline void 1429static inline void
1375queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb) 1430queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1431 __releases(&hb->lock)
1376{ 1432{
1377 spin_unlock(&hb->lock); 1433 spin_unlock(&hb->lock);
1378 drop_futex_key_refs(&q->key);
1379} 1434}
1380 1435
1381/** 1436/**
@@ -1391,6 +1446,7 @@ queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
1391 * an example). 1446 * an example).
1392 */ 1447 */
1393static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) 1448static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1449 __releases(&hb->lock)
1394{ 1450{
1395 int prio; 1451 int prio;
1396 1452
@@ -1405,9 +1461,6 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1405 prio = min(current->normal_prio, MAX_RT_PRIO); 1461 prio = min(current->normal_prio, MAX_RT_PRIO);
1406 1462
1407 plist_node_init(&q->list, prio); 1463 plist_node_init(&q->list, prio);
1408#ifdef CONFIG_DEBUG_PI_LIST
1409 q->list.plist.spinlock = &hb->lock;
1410#endif
1411 plist_add(&q->list, &hb->chain); 1464 plist_add(&q->list, &hb->chain);
1412 q->task = current; 1465 q->task = current;
1413 spin_unlock(&hb->lock); 1466 spin_unlock(&hb->lock);
@@ -1452,8 +1505,7 @@ retry:
1452 spin_unlock(lock_ptr); 1505 spin_unlock(lock_ptr);
1453 goto retry; 1506 goto retry;
1454 } 1507 }
1455 WARN_ON(plist_node_empty(&q->list)); 1508 __unqueue_futex(q);
1456 plist_del(&q->list, &q->list.plist);
1457 1509
1458 BUG_ON(q->pi_state); 1510 BUG_ON(q->pi_state);
1459 1511
@@ -1471,17 +1523,15 @@ retry:
1471 * and dropped here. 1523 * and dropped here.
1472 */ 1524 */
1473static void unqueue_me_pi(struct futex_q *q) 1525static void unqueue_me_pi(struct futex_q *q)
1526 __releases(q->lock_ptr)
1474{ 1527{
1475 WARN_ON(plist_node_empty(&q->list)); 1528 __unqueue_futex(q);
1476 plist_del(&q->list, &q->list.plist);
1477 1529
1478 BUG_ON(!q->pi_state); 1530 BUG_ON(!q->pi_state);
1479 free_pi_state(q->pi_state); 1531 free_pi_state(q->pi_state);
1480 q->pi_state = NULL; 1532 q->pi_state = NULL;
1481 1533
1482 spin_unlock(q->lock_ptr); 1534 spin_unlock(q->lock_ptr);
1483
1484 drop_futex_key_refs(&q->key);
1485} 1535}
1486 1536
1487/* 1537/*
@@ -1491,7 +1541,7 @@ static void unqueue_me_pi(struct futex_q *q)
1491 * private futexes. 1541 * private futexes.
1492 */ 1542 */
1493static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, 1543static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1494 struct task_struct *newowner, int fshared) 1544 struct task_struct *newowner)
1495{ 1545{
1496 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; 1546 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1497 struct futex_pi_state *pi_state = q->pi_state; 1547 struct futex_pi_state *pi_state = q->pi_state;
@@ -1505,10 +1555,10 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1505 1555
1506 /* 1556 /*
1507 * We are here either because we stole the rtmutex from the 1557 * We are here either because we stole the rtmutex from the
1508 * pending owner or we are the pending owner which failed to 1558 * previous highest priority waiter or we are the highest priority
1509 * get the rtmutex. We have to replace the pending owner TID 1559 * waiter but failed to get the rtmutex the first time.
1510 * in the user space variable. This must be atomic as we have 1560 * We have to replace the newowner TID in the user space variable.
1511 * to preserve the owner died bit here. 1561 * This must be atomic as we have to preserve the owner died bit here.
1512 * 1562 *
1513 * Note: We write the user space value _before_ changing the pi_state 1563 * Note: We write the user space value _before_ changing the pi_state
1514 * because we can fault here. Imagine swapped out pages or a fork 1564 * because we can fault here. Imagine swapped out pages or a fork
@@ -1527,9 +1577,7 @@ retry:
1527 while (1) { 1577 while (1) {
1528 newval = (uval & FUTEX_OWNER_DIED) | newtid; 1578 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1529 1579
1530 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 1580 if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
1531
1532 if (curval == -EFAULT)
1533 goto handle_fault; 1581 goto handle_fault;
1534 if (curval == uval) 1582 if (curval == uval)
1535 break; 1583 break;
@@ -1557,8 +1605,8 @@ retry:
1557 1605
1558 /* 1606 /*
1559 * To handle the page fault we need to drop the hash bucket 1607 * To handle the page fault we need to drop the hash bucket
1560 * lock here. That gives the other task (either the pending 1608 * lock here. That gives the other task (either the highest priority
1561 * owner itself or the task which stole the rtmutex) the 1609 * waiter itself or the task which stole the rtmutex) the
1562 * chance to try the fixup of the pi_state. So once we are 1610 * chance to try the fixup of the pi_state. So once we are
1563 * back from handling the fault we need to check the pi_state 1611 * back from handling the fault we need to check the pi_state
1564 * after reacquiring the hash bucket lock and before trying to 1612 * after reacquiring the hash bucket lock and before trying to
@@ -1584,20 +1632,11 @@ handle_fault:
1584 goto retry; 1632 goto retry;
1585} 1633}
1586 1634
1587/*
1588 * In case we must use restart_block to restart a futex_wait,
1589 * we encode in the 'flags' shared capability
1590 */
1591#define FLAGS_SHARED 0x01
1592#define FLAGS_CLOCKRT 0x02
1593#define FLAGS_HAS_TIMEOUT 0x04
1594
1595static long futex_wait_restart(struct restart_block *restart); 1635static long futex_wait_restart(struct restart_block *restart);
1596 1636
1597/** 1637/**
1598 * fixup_owner() - Post lock pi_state and corner case management 1638 * fixup_owner() - Post lock pi_state and corner case management
1599 * @uaddr: user address of the futex 1639 * @uaddr: user address of the futex
1600 * @fshared: whether the futex is shared (1) or not (0)
1601 * @q: futex_q (contains pi_state and access to the rt_mutex) 1640 * @q: futex_q (contains pi_state and access to the rt_mutex)
1602 * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) 1641 * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0)
1603 * 1642 *
@@ -1610,8 +1649,7 @@ static long futex_wait_restart(struct restart_block *restart);
1610 * 0 - success, lock not taken 1649 * 0 - success, lock not taken
1611 * <0 - on error (-EFAULT) 1650 * <0 - on error (-EFAULT)
1612 */ 1651 */
1613static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, 1652static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
1614 int locked)
1615{ 1653{
1616 struct task_struct *owner; 1654 struct task_struct *owner;
1617 int ret = 0; 1655 int ret = 0;
@@ -1622,7 +1660,7 @@ static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
1622 * did a lock-steal - fix up the PI-state in that case: 1660 * did a lock-steal - fix up the PI-state in that case:
1623 */ 1661 */
1624 if (q->pi_state->owner != current) 1662 if (q->pi_state->owner != current)
1625 ret = fixup_pi_state_owner(uaddr, q, current, fshared); 1663 ret = fixup_pi_state_owner(uaddr, q, current);
1626 goto out; 1664 goto out;
1627 } 1665 }
1628 1666
@@ -1644,18 +1682,20 @@ static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
1644 /* 1682 /*
1645 * pi_state is incorrect, some other task did a lock steal and 1683 * pi_state is incorrect, some other task did a lock steal and
1646 * we returned due to timeout or signal without taking the 1684 * we returned due to timeout or signal without taking the
1647 * rt_mutex. Too late. We can access the rt_mutex_owner without 1685 * rt_mutex. Too late.
1648 * locking, as the other task is now blocked on the hash bucket
1649 * lock. Fix the state up.
1650 */ 1686 */
1687 raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
1651 owner = rt_mutex_owner(&q->pi_state->pi_mutex); 1688 owner = rt_mutex_owner(&q->pi_state->pi_mutex);
1652 ret = fixup_pi_state_owner(uaddr, q, owner, fshared); 1689 if (!owner)
1690 owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
1691 raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
1692 ret = fixup_pi_state_owner(uaddr, q, owner);
1653 goto out; 1693 goto out;
1654 } 1694 }
1655 1695
1656 /* 1696 /*
1657 * Paranoia check. If we did not take the lock, then we should not be 1697 * Paranoia check. If we did not take the lock, then we should not be
1658 * the owner, nor the pending owner, of the rt_mutex. 1698 * the owner of the rt_mutex.
1659 */ 1699 */
1660 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) 1700 if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
1661 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " 1701 printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
@@ -1712,7 +1752,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1712 * futex_wait_setup() - Prepare to wait on a futex 1752 * futex_wait_setup() - Prepare to wait on a futex
1713 * @uaddr: the futex userspace address 1753 * @uaddr: the futex userspace address
1714 * @val: the expected value 1754 * @val: the expected value
1715 * @fshared: whether the futex is shared (1) or not (0) 1755 * @flags: futex flags (FLAGS_SHARED, etc.)
1716 * @q: the associated futex_q 1756 * @q: the associated futex_q
1717 * @hb: storage for hash_bucket pointer to be returned to caller 1757 * @hb: storage for hash_bucket pointer to be returned to caller
1718 * 1758 *
@@ -1725,7 +1765,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1725 * 0 - uaddr contains val and hb has been locked 1765 * 0 - uaddr contains val and hb has been locked
1726 * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked 1766 * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
1727 */ 1767 */
1728static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, 1768static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
1729 struct futex_q *q, struct futex_hash_bucket **hb) 1769 struct futex_q *q, struct futex_hash_bucket **hb)
1730{ 1770{
1731 u32 uval; 1771 u32 uval;
@@ -1740,17 +1780,17 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
1740 * 1780 *
1741 * The basic logical guarantee of a futex is that it blocks ONLY 1781 * The basic logical guarantee of a futex is that it blocks ONLY
1742 * if cond(var) is known to be true at the time of blocking, for 1782 * if cond(var) is known to be true at the time of blocking, for
1743 * any cond. If we queued after testing *uaddr, that would open 1783 * any cond. If we locked the hash-bucket after testing *uaddr, that
1744 * a race condition where we could block indefinitely with 1784 * would open a race condition where we could block indefinitely with
1745 * cond(var) false, which would violate the guarantee. 1785 * cond(var) false, which would violate the guarantee.
1746 * 1786 *
1747 * A consequence is that futex_wait() can return zero and absorb 1787 * On the other hand, we insert q and release the hash-bucket only
1748 * a wakeup when *uaddr != val on entry to the syscall. This is 1788 * after testing *uaddr. This guarantees that futex_wait() will NOT
1749 * rare, but normal. 1789 * absorb a wakeup if *uaddr does not match the desired values
1790 * while the syscall executes.
1750 */ 1791 */
1751retry: 1792retry:
1752 q->key = FUTEX_KEY_INIT; 1793 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
1753 ret = get_futex_key(uaddr, fshared, &q->key);
1754 if (unlikely(ret != 0)) 1794 if (unlikely(ret != 0))
1755 return ret; 1795 return ret;
1756 1796
@@ -1766,10 +1806,10 @@ retry_private:
1766 if (ret) 1806 if (ret)
1767 goto out; 1807 goto out;
1768 1808
1769 if (!fshared) 1809 if (!(flags & FLAGS_SHARED))
1770 goto retry_private; 1810 goto retry_private;
1771 1811
1772 put_futex_key(fshared, &q->key); 1812 put_futex_key(&q->key);
1773 goto retry; 1813 goto retry;
1774 } 1814 }
1775 1815
@@ -1780,40 +1820,40 @@ retry_private:
1780 1820
1781out: 1821out:
1782 if (ret) 1822 if (ret)
1783 put_futex_key(fshared, &q->key); 1823 put_futex_key(&q->key);
1784 return ret; 1824 return ret;
1785} 1825}
1786 1826
1787static int futex_wait(u32 __user *uaddr, int fshared, 1827static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
1788 u32 val, ktime_t *abs_time, u32 bitset, int clockrt) 1828 ktime_t *abs_time, u32 bitset)
1789{ 1829{
1790 struct hrtimer_sleeper timeout, *to = NULL; 1830 struct hrtimer_sleeper timeout, *to = NULL;
1791 struct restart_block *restart; 1831 struct restart_block *restart;
1792 struct futex_hash_bucket *hb; 1832 struct futex_hash_bucket *hb;
1793 struct futex_q q; 1833 struct futex_q q = futex_q_init;
1794 int ret; 1834 int ret;
1795 1835
1796 if (!bitset) 1836 if (!bitset)
1797 return -EINVAL; 1837 return -EINVAL;
1798
1799 q.pi_state = NULL;
1800 q.bitset = bitset; 1838 q.bitset = bitset;
1801 q.rt_waiter = NULL;
1802 q.requeue_pi_key = NULL;
1803 1839
1804 if (abs_time) { 1840 if (abs_time) {
1805 to = &timeout; 1841 to = &timeout;
1806 1842
1807 hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : 1843 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
1808 CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 1844 CLOCK_REALTIME : CLOCK_MONOTONIC,
1845 HRTIMER_MODE_ABS);
1809 hrtimer_init_sleeper(to, current); 1846 hrtimer_init_sleeper(to, current);
1810 hrtimer_set_expires_range_ns(&to->timer, *abs_time, 1847 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
1811 current->timer_slack_ns); 1848 current->timer_slack_ns);
1812 } 1849 }
1813 1850
1814retry: 1851retry:
1815 /* Prepare to wait on uaddr. */ 1852 /*
1816 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); 1853 * Prepare to wait on uaddr. On success, holds hb lock and increments
1854 * q.key refs.
1855 */
1856 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
1817 if (ret) 1857 if (ret)
1818 goto out; 1858 goto out;
1819 1859
@@ -1822,42 +1862,34 @@ retry:
1822 1862
1823 /* If we were woken (and unqueued), we succeeded, whatever. */ 1863 /* If we were woken (and unqueued), we succeeded, whatever. */
1824 ret = 0; 1864 ret = 0;
1865 /* unqueue_me() drops q.key ref */
1825 if (!unqueue_me(&q)) 1866 if (!unqueue_me(&q))
1826 goto out_put_key; 1867 goto out;
1827 ret = -ETIMEDOUT; 1868 ret = -ETIMEDOUT;
1828 if (to && !to->task) 1869 if (to && !to->task)
1829 goto out_put_key; 1870 goto out;
1830 1871
1831 /* 1872 /*
1832 * We expect signal_pending(current), but we might be the 1873 * We expect signal_pending(current), but we might be the
1833 * victim of a spurious wakeup as well. 1874 * victim of a spurious wakeup as well.
1834 */ 1875 */
1835 if (!signal_pending(current)) { 1876 if (!signal_pending(current))
1836 put_futex_key(fshared, &q.key);
1837 goto retry; 1877 goto retry;
1838 }
1839 1878
1840 ret = -ERESTARTSYS; 1879 ret = -ERESTARTSYS;
1841 if (!abs_time) 1880 if (!abs_time)
1842 goto out_put_key; 1881 goto out;
1843 1882
1844 restart = &current_thread_info()->restart_block; 1883 restart = &current_thread_info()->restart_block;
1845 restart->fn = futex_wait_restart; 1884 restart->fn = futex_wait_restart;
1846 restart->futex.uaddr = (u32 *)uaddr; 1885 restart->futex.uaddr = uaddr;
1847 restart->futex.val = val; 1886 restart->futex.val = val;
1848 restart->futex.time = abs_time->tv64; 1887 restart->futex.time = abs_time->tv64;
1849 restart->futex.bitset = bitset; 1888 restart->futex.bitset = bitset;
1850 restart->futex.flags = FLAGS_HAS_TIMEOUT; 1889 restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
1851
1852 if (fshared)
1853 restart->futex.flags |= FLAGS_SHARED;
1854 if (clockrt)
1855 restart->futex.flags |= FLAGS_CLOCKRT;
1856 1890
1857 ret = -ERESTART_RESTARTBLOCK; 1891 ret = -ERESTART_RESTARTBLOCK;
1858 1892
1859out_put_key:
1860 put_futex_key(fshared, &q.key);
1861out: 1893out:
1862 if (to) { 1894 if (to) {
1863 hrtimer_cancel(&to->timer); 1895 hrtimer_cancel(&to->timer);
@@ -1869,8 +1901,7 @@ out:
1869 1901
1870static long futex_wait_restart(struct restart_block *restart) 1902static long futex_wait_restart(struct restart_block *restart)
1871{ 1903{
1872 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; 1904 u32 __user *uaddr = restart->futex.uaddr;
1873 int fshared = 0;
1874 ktime_t t, *tp = NULL; 1905 ktime_t t, *tp = NULL;
1875 1906
1876 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { 1907 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
@@ -1878,11 +1909,9 @@ static long futex_wait_restart(struct restart_block *restart)
1878 tp = &t; 1909 tp = &t;
1879 } 1910 }
1880 restart->fn = do_no_restart_syscall; 1911 restart->fn = do_no_restart_syscall;
1881 if (restart->futex.flags & FLAGS_SHARED) 1912
1882 fshared = 1; 1913 return (long)futex_wait(uaddr, restart->futex.flags,
1883 return (long)futex_wait(uaddr, fshared, restart->futex.val, tp, 1914 restart->futex.val, tp, restart->futex.bitset);
1884 restart->futex.bitset,
1885 restart->futex.flags & FLAGS_CLOCKRT);
1886} 1915}
1887 1916
1888 1917
@@ -1892,12 +1921,12 @@ static long futex_wait_restart(struct restart_block *restart)
1892 * if there are waiters then it will block, it does PI, etc. (Due to 1921 * if there are waiters then it will block, it does PI, etc. (Due to
1893 * races the kernel might see a 0 value of the futex too.) 1922 * races the kernel might see a 0 value of the futex too.)
1894 */ 1923 */
1895static int futex_lock_pi(u32 __user *uaddr, int fshared, 1924static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, int detect,
1896 int detect, ktime_t *time, int trylock) 1925 ktime_t *time, int trylock)
1897{ 1926{
1898 struct hrtimer_sleeper timeout, *to = NULL; 1927 struct hrtimer_sleeper timeout, *to = NULL;
1899 struct futex_hash_bucket *hb; 1928 struct futex_hash_bucket *hb;
1900 struct futex_q q; 1929 struct futex_q q = futex_q_init;
1901 int res, ret; 1930 int res, ret;
1902 1931
1903 if (refill_pi_state_cache()) 1932 if (refill_pi_state_cache())
@@ -1911,12 +1940,8 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
1911 hrtimer_set_expires(&to->timer, *time); 1940 hrtimer_set_expires(&to->timer, *time);
1912 } 1941 }
1913 1942
1914 q.pi_state = NULL;
1915 q.rt_waiter = NULL;
1916 q.requeue_pi_key = NULL;
1917retry: 1943retry:
1918 q.key = FUTEX_KEY_INIT; 1944 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key);
1919 ret = get_futex_key(uaddr, fshared, &q.key);
1920 if (unlikely(ret != 0)) 1945 if (unlikely(ret != 0))
1921 goto out; 1946 goto out;
1922 1947
@@ -1938,7 +1963,7 @@ retry_private:
1938 * exit to complete. 1963 * exit to complete.
1939 */ 1964 */
1940 queue_unlock(&q, hb); 1965 queue_unlock(&q, hb);
1941 put_futex_key(fshared, &q.key); 1966 put_futex_key(&q.key);
1942 cond_resched(); 1967 cond_resched();
1943 goto retry; 1968 goto retry;
1944 default: 1969 default:
@@ -1968,7 +1993,7 @@ retry_private:
1968 * Fixup the pi_state owner and possibly acquire the lock if we 1993 * Fixup the pi_state owner and possibly acquire the lock if we
1969 * haven't already. 1994 * haven't already.
1970 */ 1995 */
1971 res = fixup_owner(uaddr, fshared, &q, !ret); 1996 res = fixup_owner(uaddr, &q, !ret);
1972 /* 1997 /*
1973 * If fixup_owner() returned an error, proprogate that. If it acquired 1998 * If fixup_owner() returned an error, proprogate that. If it acquired
1974 * the lock, clear our -ETIMEDOUT or -EINTR. 1999 * the lock, clear our -ETIMEDOUT or -EINTR.
@@ -1992,7 +2017,7 @@ out_unlock_put_key:
1992 queue_unlock(&q, hb); 2017 queue_unlock(&q, hb);
1993 2018
1994out_put_key: 2019out_put_key:
1995 put_futex_key(fshared, &q.key); 2020 put_futex_key(&q.key);
1996out: 2021out:
1997 if (to) 2022 if (to)
1998 destroy_hrtimer_on_stack(&to->timer); 2023 destroy_hrtimer_on_stack(&to->timer);
@@ -2005,10 +2030,10 @@ uaddr_faulted:
2005 if (ret) 2030 if (ret)
2006 goto out_put_key; 2031 goto out_put_key;
2007 2032
2008 if (!fshared) 2033 if (!(flags & FLAGS_SHARED))
2009 goto retry_private; 2034 goto retry_private;
2010 2035
2011 put_futex_key(fshared, &q.key); 2036 put_futex_key(&q.key);
2012 goto retry; 2037 goto retry;
2013} 2038}
2014 2039
@@ -2017,13 +2042,13 @@ uaddr_faulted:
2017 * This is the in-kernel slowpath: we look up the PI state (if any), 2042 * This is the in-kernel slowpath: we look up the PI state (if any),
2018 * and do the rt-mutex unlock. 2043 * and do the rt-mutex unlock.
2019 */ 2044 */
2020static int futex_unlock_pi(u32 __user *uaddr, int fshared) 2045static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
2021{ 2046{
2022 struct futex_hash_bucket *hb; 2047 struct futex_hash_bucket *hb;
2023 struct futex_q *this, *next; 2048 struct futex_q *this, *next;
2024 u32 uval;
2025 struct plist_head *head; 2049 struct plist_head *head;
2026 union futex_key key = FUTEX_KEY_INIT; 2050 union futex_key key = FUTEX_KEY_INIT;
2051 u32 uval, vpid = task_pid_vnr(current);
2027 int ret; 2052 int ret;
2028 2053
2029retry: 2054retry:
@@ -2032,10 +2057,10 @@ retry:
2032 /* 2057 /*
2033 * We release only a lock we actually own: 2058 * We release only a lock we actually own:
2034 */ 2059 */
2035 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) 2060 if ((uval & FUTEX_TID_MASK) != vpid)
2036 return -EPERM; 2061 return -EPERM;
2037 2062
2038 ret = get_futex_key(uaddr, fshared, &key); 2063 ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
2039 if (unlikely(ret != 0)) 2064 if (unlikely(ret != 0))
2040 goto out; 2065 goto out;
2041 2066
@@ -2047,17 +2072,14 @@ retry:
2047 * again. If it succeeds then we can return without waking 2072 * again. If it succeeds then we can return without waking
2048 * anyone else up: 2073 * anyone else up:
2049 */ 2074 */
2050 if (!(uval & FUTEX_OWNER_DIED)) 2075 if (!(uval & FUTEX_OWNER_DIED) &&
2051 uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0); 2076 cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
2052
2053
2054 if (unlikely(uval == -EFAULT))
2055 goto pi_faulted; 2077 goto pi_faulted;
2056 /* 2078 /*
2057 * Rare case: we managed to release the lock atomically, 2079 * Rare case: we managed to release the lock atomically,
2058 * no need to wake anyone else up: 2080 * no need to wake anyone else up:
2059 */ 2081 */
2060 if (unlikely(uval == task_pid_vnr(current))) 2082 if (unlikely(uval == vpid))
2061 goto out_unlock; 2083 goto out_unlock;
2062 2084
2063 /* 2085 /*
@@ -2090,14 +2112,14 @@ retry:
2090 2112
2091out_unlock: 2113out_unlock:
2092 spin_unlock(&hb->lock); 2114 spin_unlock(&hb->lock);
2093 put_futex_key(fshared, &key); 2115 put_futex_key(&key);
2094 2116
2095out: 2117out:
2096 return ret; 2118 return ret;
2097 2119
2098pi_faulted: 2120pi_faulted:
2099 spin_unlock(&hb->lock); 2121 spin_unlock(&hb->lock);
2100 put_futex_key(fshared, &key); 2122 put_futex_key(&key);
2101 2123
2102 ret = fault_in_user_writeable(uaddr); 2124 ret = fault_in_user_writeable(uaddr);
2103 if (!ret) 2125 if (!ret)
@@ -2142,7 +2164,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2142 * We were woken prior to requeue by a timeout or a signal. 2164 * We were woken prior to requeue by a timeout or a signal.
2143 * Unqueue the futex_q and determine which it was. 2165 * Unqueue the futex_q and determine which it was.
2144 */ 2166 */
2145 plist_del(&q->list, &q->list.plist); 2167 plist_del(&q->list, &hb->chain);
2146 2168
2147 /* Handle spurious wakeups gracefully */ 2169 /* Handle spurious wakeups gracefully */
2148 ret = -EWOULDBLOCK; 2170 ret = -EWOULDBLOCK;
@@ -2157,7 +2179,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2157/** 2179/**
2158 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 2180 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
2159 * @uaddr: the futex we initially wait on (non-pi) 2181 * @uaddr: the futex we initially wait on (non-pi)
2160 * @fshared: whether the futexes are shared (1) or not (0). They must be 2182 * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
2161 * the same type, no requeueing from private to shared, etc. 2183 * the same type, no requeueing from private to shared, etc.
2162 * @val: the expected value of uaddr 2184 * @val: the expected value of uaddr
2163 * @abs_time: absolute timeout 2185 * @abs_time: absolute timeout
@@ -2195,16 +2217,16 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2195 * 0 - On success 2217 * 0 - On success
2196 * <0 - On error 2218 * <0 - On error
2197 */ 2219 */
2198static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, 2220static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
2199 u32 val, ktime_t *abs_time, u32 bitset, 2221 u32 val, ktime_t *abs_time, u32 bitset,
2200 int clockrt, u32 __user *uaddr2) 2222 u32 __user *uaddr2)
2201{ 2223{
2202 struct hrtimer_sleeper timeout, *to = NULL; 2224 struct hrtimer_sleeper timeout, *to = NULL;
2203 struct rt_mutex_waiter rt_waiter; 2225 struct rt_mutex_waiter rt_waiter;
2204 struct rt_mutex *pi_mutex = NULL; 2226 struct rt_mutex *pi_mutex = NULL;
2205 struct futex_hash_bucket *hb; 2227 struct futex_hash_bucket *hb;
2206 union futex_key key2; 2228 union futex_key key2 = FUTEX_KEY_INIT;
2207 struct futex_q q; 2229 struct futex_q q = futex_q_init;
2208 int res, ret; 2230 int res, ret;
2209 2231
2210 if (!bitset) 2232 if (!bitset)
@@ -2212,8 +2234,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2212 2234
2213 if (abs_time) { 2235 if (abs_time) {
2214 to = &timeout; 2236 to = &timeout;
2215 hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : 2237 hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
2216 CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 2238 CLOCK_REALTIME : CLOCK_MONOTONIC,
2239 HRTIMER_MODE_ABS);
2217 hrtimer_init_sleeper(to, current); 2240 hrtimer_init_sleeper(to, current);
2218 hrtimer_set_expires_range_ns(&to->timer, *abs_time, 2241 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2219 current->timer_slack_ns); 2242 current->timer_slack_ns);
@@ -2226,18 +2249,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2226 debug_rt_mutex_init_waiter(&rt_waiter); 2249 debug_rt_mutex_init_waiter(&rt_waiter);
2227 rt_waiter.task = NULL; 2250 rt_waiter.task = NULL;
2228 2251
2229 key2 = FUTEX_KEY_INIT; 2252 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2);
2230 ret = get_futex_key(uaddr2, fshared, &key2);
2231 if (unlikely(ret != 0)) 2253 if (unlikely(ret != 0))
2232 goto out; 2254 goto out;
2233 2255
2234 q.pi_state = NULL;
2235 q.bitset = bitset; 2256 q.bitset = bitset;
2236 q.rt_waiter = &rt_waiter; 2257 q.rt_waiter = &rt_waiter;
2237 q.requeue_pi_key = &key2; 2258 q.requeue_pi_key = &key2;
2238 2259
2239 /* Prepare to wait on uaddr. */ 2260 /*
2240 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); 2261 * Prepare to wait on uaddr. On success, increments q.key (key1) ref
2262 * count.
2263 */
2264 ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
2241 if (ret) 2265 if (ret)
2242 goto out_key2; 2266 goto out_key2;
2243 2267
@@ -2254,7 +2278,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2254 * In order for us to be here, we know our q.key == key2, and since 2278 * In order for us to be here, we know our q.key == key2, and since
2255 * we took the hb->lock above, we also know that futex_requeue() has 2279 * we took the hb->lock above, we also know that futex_requeue() has
2256 * completed and we no longer have to concern ourselves with a wakeup 2280 * completed and we no longer have to concern ourselves with a wakeup
2257 * race with the atomic proxy lock acquition by the requeue code. 2281 * race with the atomic proxy lock acquisition by the requeue code. The
2282 * futex_requeue dropped our key1 reference and incremented our key2
2283 * reference count.
2258 */ 2284 */
2259 2285
2260 /* Check if the requeue code acquired the second futex for us. */ 2286 /* Check if the requeue code acquired the second futex for us. */
@@ -2265,8 +2291,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2265 */ 2291 */
2266 if (q.pi_state && (q.pi_state->owner != current)) { 2292 if (q.pi_state && (q.pi_state->owner != current)) {
2267 spin_lock(q.lock_ptr); 2293 spin_lock(q.lock_ptr);
2268 ret = fixup_pi_state_owner(uaddr2, &q, current, 2294 ret = fixup_pi_state_owner(uaddr2, &q, current);
2269 fshared);
2270 spin_unlock(q.lock_ptr); 2295 spin_unlock(q.lock_ptr);
2271 } 2296 }
2272 } else { 2297 } else {
@@ -2285,7 +2310,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2285 * Fixup the pi_state owner and possibly acquire the lock if we 2310 * Fixup the pi_state owner and possibly acquire the lock if we
2286 * haven't already. 2311 * haven't already.
2287 */ 2312 */
2288 res = fixup_owner(uaddr2, fshared, &q, !ret); 2313 res = fixup_owner(uaddr2, &q, !ret);
2289 /* 2314 /*
2290 * If fixup_owner() returned an error, proprogate that. If it 2315 * If fixup_owner() returned an error, proprogate that. If it
2291 * acquired the lock, clear -ETIMEDOUT or -EINTR. 2316 * acquired the lock, clear -ETIMEDOUT or -EINTR.
@@ -2316,9 +2341,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2316 } 2341 }
2317 2342
2318out_put_keys: 2343out_put_keys:
2319 put_futex_key(fshared, &q.key); 2344 put_futex_key(&q.key);
2320out_key2: 2345out_key2:
2321 put_futex_key(fshared, &key2); 2346 put_futex_key(&key2);
2322 2347
2323out: 2348out:
2324 if (to) { 2349 if (to) {
@@ -2393,10 +2418,19 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
2393 goto err_unlock; 2418 goto err_unlock;
2394 ret = -EPERM; 2419 ret = -EPERM;
2395 pcred = __task_cred(p); 2420 pcred = __task_cred(p);
2421 /* If victim is in different user_ns, then uids are not
2422 comparable, so we must have CAP_SYS_PTRACE */
2423 if (cred->user->user_ns != pcred->user->user_ns) {
2424 if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2425 goto err_unlock;
2426 goto ok;
2427 }
2428 /* If victim is in same user_ns, then uids are comparable */
2396 if (cred->euid != pcred->euid && 2429 if (cred->euid != pcred->euid &&
2397 cred->euid != pcred->uid && 2430 cred->euid != pcred->uid &&
2398 !capable(CAP_SYS_PTRACE)) 2431 !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
2399 goto err_unlock; 2432 goto err_unlock;
2433ok:
2400 head = p->robust_list; 2434 head = p->robust_list;
2401 rcu_read_unlock(); 2435 rcu_read_unlock();
2402 } 2436 }
@@ -2435,11 +2469,20 @@ retry:
2435 * userspace. 2469 * userspace.
2436 */ 2470 */
2437 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; 2471 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2438 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); 2472 /*
2439 2473 * We are not holding a lock here, but we want to have
2440 if (nval == -EFAULT) 2474 * the pagefault_disable/enable() protection because
2441 return -1; 2475 * we want to handle the fault gracefully. If the
2442 2476 * access fails we try to fault in the futex with R/W
2477 * verification via get_user_pages. get_user() above
2478 * does not guarantee R/W access. If that fails we
2479 * give up and leave the futex locked.
2480 */
2481 if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
2482 if (fault_in_user_writeable(uaddr))
2483 return -1;
2484 goto retry;
2485 }
2443 if (nval != uval) 2486 if (nval != uval)
2444 goto retry; 2487 goto retry;
2445 2488
@@ -2458,7 +2501,7 @@ retry:
2458 */ 2501 */
2459static inline int fetch_robust_entry(struct robust_list __user **entry, 2502static inline int fetch_robust_entry(struct robust_list __user **entry,
2460 struct robust_list __user * __user *head, 2503 struct robust_list __user * __user *head,
2461 int *pi) 2504 unsigned int *pi)
2462{ 2505{
2463 unsigned long uentry; 2506 unsigned long uentry;
2464 2507
@@ -2481,7 +2524,8 @@ void exit_robust_list(struct task_struct *curr)
2481{ 2524{
2482 struct robust_list_head __user *head = curr->robust_list; 2525 struct robust_list_head __user *head = curr->robust_list;
2483 struct robust_list __user *entry, *next_entry, *pending; 2526 struct robust_list __user *entry, *next_entry, *pending;
2484 unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; 2527 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
2528 unsigned int uninitialized_var(next_pi);
2485 unsigned long futex_offset; 2529 unsigned long futex_offset;
2486 int rc; 2530 int rc;
2487 2531
@@ -2542,58 +2586,57 @@ void exit_robust_list(struct task_struct *curr)
2542long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, 2586long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2543 u32 __user *uaddr2, u32 val2, u32 val3) 2587 u32 __user *uaddr2, u32 val2, u32 val3)
2544{ 2588{
2545 int clockrt, ret = -ENOSYS; 2589 int ret = -ENOSYS, cmd = op & FUTEX_CMD_MASK;
2546 int cmd = op & FUTEX_CMD_MASK; 2590 unsigned int flags = 0;
2547 int fshared = 0;
2548 2591
2549 if (!(op & FUTEX_PRIVATE_FLAG)) 2592 if (!(op & FUTEX_PRIVATE_FLAG))
2550 fshared = 1; 2593 flags |= FLAGS_SHARED;
2551 2594
2552 clockrt = op & FUTEX_CLOCK_REALTIME; 2595 if (op & FUTEX_CLOCK_REALTIME) {
2553 if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) 2596 flags |= FLAGS_CLOCKRT;
2554 return -ENOSYS; 2597 if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2598 return -ENOSYS;
2599 }
2555 2600
2556 switch (cmd) { 2601 switch (cmd) {
2557 case FUTEX_WAIT: 2602 case FUTEX_WAIT:
2558 val3 = FUTEX_BITSET_MATCH_ANY; 2603 val3 = FUTEX_BITSET_MATCH_ANY;
2559 case FUTEX_WAIT_BITSET: 2604 case FUTEX_WAIT_BITSET:
2560 ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt); 2605 ret = futex_wait(uaddr, flags, val, timeout, val3);
2561 break; 2606 break;
2562 case FUTEX_WAKE: 2607 case FUTEX_WAKE:
2563 val3 = FUTEX_BITSET_MATCH_ANY; 2608 val3 = FUTEX_BITSET_MATCH_ANY;
2564 case FUTEX_WAKE_BITSET: 2609 case FUTEX_WAKE_BITSET:
2565 ret = futex_wake(uaddr, fshared, val, val3); 2610 ret = futex_wake(uaddr, flags, val, val3);
2566 break; 2611 break;
2567 case FUTEX_REQUEUE: 2612 case FUTEX_REQUEUE:
2568 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); 2613 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
2569 break; 2614 break;
2570 case FUTEX_CMP_REQUEUE: 2615 case FUTEX_CMP_REQUEUE:
2571 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 2616 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
2572 0);
2573 break; 2617 break;
2574 case FUTEX_WAKE_OP: 2618 case FUTEX_WAKE_OP:
2575 ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); 2619 ret = futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
2576 break; 2620 break;
2577 case FUTEX_LOCK_PI: 2621 case FUTEX_LOCK_PI:
2578 if (futex_cmpxchg_enabled) 2622 if (futex_cmpxchg_enabled)
2579 ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); 2623 ret = futex_lock_pi(uaddr, flags, val, timeout, 0);
2580 break; 2624 break;
2581 case FUTEX_UNLOCK_PI: 2625 case FUTEX_UNLOCK_PI:
2582 if (futex_cmpxchg_enabled) 2626 if (futex_cmpxchg_enabled)
2583 ret = futex_unlock_pi(uaddr, fshared); 2627 ret = futex_unlock_pi(uaddr, flags);
2584 break; 2628 break;
2585 case FUTEX_TRYLOCK_PI: 2629 case FUTEX_TRYLOCK_PI:
2586 if (futex_cmpxchg_enabled) 2630 if (futex_cmpxchg_enabled)
2587 ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); 2631 ret = futex_lock_pi(uaddr, flags, 0, timeout, 1);
2588 break; 2632 break;
2589 case FUTEX_WAIT_REQUEUE_PI: 2633 case FUTEX_WAIT_REQUEUE_PI:
2590 val3 = FUTEX_BITSET_MATCH_ANY; 2634 val3 = FUTEX_BITSET_MATCH_ANY;
2591 ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, 2635 ret = futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
2592 clockrt, uaddr2); 2636 uaddr2);
2593 break; 2637 break;
2594 case FUTEX_CMP_REQUEUE_PI: 2638 case FUTEX_CMP_REQUEUE_PI:
2595 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 2639 ret = futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
2596 1);
2597 break; 2640 break;
2598 default: 2641 default:
2599 ret = -ENOSYS; 2642 ret = -ENOSYS;
@@ -2647,11 +2690,10 @@ static int __init futex_init(void)
2647 * of the complex code paths. Also we want to prevent 2690 * of the complex code paths. Also we want to prevent
2648 * registration of robust lists in that case. NULL is 2691 * registration of robust lists in that case. NULL is
2649 * guaranteed to fault and we get -EFAULT on functional 2692 * guaranteed to fault and we get -EFAULT on functional
2650 * implementation, the non functional ones will return 2693 * implementation, the non-functional ones will return
2651 * -ENOSYS. 2694 * -ENOSYS.
2652 */ 2695 */
2653 curval = cmpxchg_futex_value_locked(NULL, 0, 0); 2696 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2654 if (curval == -EFAULT)
2655 futex_cmpxchg_enabled = 1; 2697 futex_cmpxchg_enabled = 1;
2656 2698
2657 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { 2699 for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {