diff options
-rw-r--r-- | include/linux/futex.h | 29 | ||||
-rw-r--r-- | kernel/futex.c | 324 |
2 files changed, 236 insertions, 117 deletions
diff --git a/include/linux/futex.h b/include/linux/futex.h index 1bd8dfcb037b..899fc7f20edd 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h | |||
@@ -19,6 +19,18 @@ union ktime; | |||
19 | #define FUTEX_TRYLOCK_PI 8 | 19 | #define FUTEX_TRYLOCK_PI 8 |
20 | #define FUTEX_CMP_REQUEUE_PI 9 | 20 | #define FUTEX_CMP_REQUEUE_PI 9 |
21 | 21 | ||
22 | #define FUTEX_PRIVATE_FLAG 128 | ||
23 | #define FUTEX_CMD_MASK ~FUTEX_PRIVATE_FLAG | ||
24 | |||
25 | #define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG) | ||
26 | #define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG) | ||
27 | #define FUTEX_REQUEUE_PRIVATE (FUTEX_REQUEUE | FUTEX_PRIVATE_FLAG) | ||
28 | #define FUTEX_CMP_REQUEUE_PRIVATE (FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG) | ||
29 | #define FUTEX_WAKE_OP_PRIVATE (FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG) | ||
30 | #define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG) | ||
31 | #define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG) | ||
32 | #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) | ||
33 | |||
22 | /* | 34 | /* |
23 | * Support for robust futexes: the kernel cleans up held futexes at | 35 | * Support for robust futexes: the kernel cleans up held futexes at |
24 | * thread exit time. | 36 | * thread exit time. |
@@ -114,8 +126,18 @@ handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); | |||
114 | * Don't rearrange members without looking at hash_futex(). | 126 | * Don't rearrange members without looking at hash_futex(). |
115 | * | 127 | * |
116 | * offset is aligned to a multiple of sizeof(u32) (== 4) by definition. | 128 | * offset is aligned to a multiple of sizeof(u32) (== 4) by definition. |
117 | * We set bit 0 to indicate if it's an inode-based key. | 129 | * We use the two low order bits of offset to tell what is the kind of key : |
118 | */ | 130 | * 00 : Private process futex (PTHREAD_PROCESS_PRIVATE) |
131 | * (no reference on an inode or mm) | ||
132 | * 01 : Shared futex (PTHREAD_PROCESS_SHARED) | ||
133 | * mapped on a file (reference on the underlying inode) | ||
134 | * 10 : Shared futex (PTHREAD_PROCESS_SHARED) | ||
135 | * (but private mapping on an mm, and reference taken on it) | ||
136 | */ | ||
137 | |||
138 | #define FUT_OFF_INODE 1 /* We set bit 0 if key has a reference on inode */ | ||
139 | #define FUT_OFF_MMSHARED 2 /* We set bit 1 if key has a reference on mm */ | ||
140 | |||
119 | union futex_key { | 141 | union futex_key { |
120 | u32 __user *uaddr; | 142 | u32 __user *uaddr; |
121 | struct { | 143 | struct { |
@@ -134,7 +156,8 @@ union futex_key { | |||
134 | int offset; | 156 | int offset; |
135 | } both; | 157 | } both; |
136 | }; | 158 | }; |
137 | int get_futex_key(u32 __user *uaddr, union futex_key *key); | 159 | int get_futex_key(u32 __user *uaddr, struct rw_semaphore *shared, |
160 | union futex_key *key); | ||
138 | void get_futex_key_refs(union futex_key *key); | 161 | void get_futex_key_refs(union futex_key *key); |
139 | void drop_futex_key_refs(union futex_key *key); | 162 | void drop_futex_key_refs(union futex_key *key); |
140 | 163 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index 4a60ef55dab4..b7ce15c67e32 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -16,6 +16,9 @@ | |||
16 | * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | 16 | * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> |
17 | * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | 17 | * Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> |
18 | * | 18 | * |
19 | * PRIVATE futexes by Eric Dumazet | ||
20 | * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com> | ||
21 | * | ||
19 | * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly | 22 | * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly |
20 | * enough at me, Linus for the original (flawed) idea, Matthew | 23 | * enough at me, Linus for the original (flawed) idea, Matthew |
21 | * Kirkwood for proof-of-concept implementation. | 24 | * Kirkwood for proof-of-concept implementation. |
@@ -150,19 +153,26 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2) | |||
150 | && key1->both.offset == key2->both.offset); | 153 | && key1->both.offset == key2->both.offset); |
151 | } | 154 | } |
152 | 155 | ||
153 | /* | 156 | /** |
154 | * Get parameters which are the keys for a futex. | 157 | * get_futex_key - Get parameters which are the keys for a futex. |
158 | * @uaddr: virtual address of the futex | ||
159 | * @shared: NULL for a PROCESS_PRIVATE futex, | ||
160 | * ¤t->mm->mmap_sem for a PROCESS_SHARED futex | ||
161 | * @key: address where result is stored. | ||
162 | * | ||
163 | * Returns a negative error code or 0 | ||
164 | * The key words are stored in *key on success. | ||
155 | * | 165 | * |
156 | * For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode, | 166 | * For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode, |
157 | * offset_within_page). For private mappings, it's (uaddr, current->mm). | 167 | * offset_within_page). For private mappings, it's (uaddr, current->mm). |
158 | * We can usually work out the index without swapping in the page. | 168 | * We can usually work out the index without swapping in the page. |
159 | * | 169 | * |
160 | * Returns: 0, or negative error code. | 170 | * fshared is NULL for PROCESS_PRIVATE futexes |
161 | * The key words are stored in *key on success. | 171 | * For other futexes, it points to ¤t->mm->mmap_sem and |
162 | * | 172 | * caller must have taken the reader lock. but NOT any spinlocks. |
163 | * Should be called with ¤t->mm->mmap_sem but NOT any spinlocks. | ||
164 | */ | 173 | */ |
165 | int get_futex_key(u32 __user *uaddr, union futex_key *key) | 174 | int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, |
175 | union futex_key *key) | ||
166 | { | 176 | { |
167 | unsigned long address = (unsigned long)uaddr; | 177 | unsigned long address = (unsigned long)uaddr; |
168 | struct mm_struct *mm = current->mm; | 178 | struct mm_struct *mm = current->mm; |
@@ -174,11 +184,25 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key) | |||
174 | * The futex address must be "naturally" aligned. | 184 | * The futex address must be "naturally" aligned. |
175 | */ | 185 | */ |
176 | key->both.offset = address % PAGE_SIZE; | 186 | key->both.offset = address % PAGE_SIZE; |
177 | if (unlikely((key->both.offset % sizeof(u32)) != 0)) | 187 | if (unlikely((address % sizeof(u32)) != 0)) |
178 | return -EINVAL; | 188 | return -EINVAL; |
179 | address -= key->both.offset; | 189 | address -= key->both.offset; |
180 | 190 | ||
181 | /* | 191 | /* |
192 | * PROCESS_PRIVATE futexes are fast. | ||
193 | * As the mm cannot disappear under us and the 'key' only needs | ||
194 | * virtual address, we dont even have to find the underlying vma. | ||
195 | * Note : We do have to check 'uaddr' is a valid user address, | ||
196 | * but access_ok() should be faster than find_vma() | ||
197 | */ | ||
198 | if (!fshared) { | ||
199 | if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))) | ||
200 | return -EFAULT; | ||
201 | key->private.mm = mm; | ||
202 | key->private.address = address; | ||
203 | return 0; | ||
204 | } | ||
205 | /* | ||
182 | * The futex is hashed differently depending on whether | 206 | * The futex is hashed differently depending on whether |
183 | * it's in a shared or private mapping. So check vma first. | 207 | * it's in a shared or private mapping. So check vma first. |
184 | */ | 208 | */ |
@@ -205,6 +229,7 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key) | |||
205 | * mappings of _writable_ handles. | 229 | * mappings of _writable_ handles. |
206 | */ | 230 | */ |
207 | if (likely(!(vma->vm_flags & VM_MAYSHARE))) { | 231 | if (likely(!(vma->vm_flags & VM_MAYSHARE))) { |
232 | key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */ | ||
208 | key->private.mm = mm; | 233 | key->private.mm = mm; |
209 | key->private.address = address; | 234 | key->private.address = address; |
210 | return 0; | 235 | return 0; |
@@ -214,7 +239,7 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key) | |||
214 | * Linear file mappings are also simple. | 239 | * Linear file mappings are also simple. |
215 | */ | 240 | */ |
216 | key->shared.inode = vma->vm_file->f_path.dentry->d_inode; | 241 | key->shared.inode = vma->vm_file->f_path.dentry->d_inode; |
217 | key->both.offset++; /* Bit 0 of offset indicates inode-based key. */ | 242 | key->both.offset |= FUT_OFF_INODE; /* inode-based key. */ |
218 | if (likely(!(vma->vm_flags & VM_NONLINEAR))) { | 243 | if (likely(!(vma->vm_flags & VM_NONLINEAR))) { |
219 | key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT) | 244 | key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT) |
220 | + vma->vm_pgoff); | 245 | + vma->vm_pgoff); |
@@ -242,16 +267,18 @@ EXPORT_SYMBOL_GPL(get_futex_key); | |||
242 | * Take a reference to the resource addressed by a key. | 267 | * Take a reference to the resource addressed by a key. |
243 | * Can be called while holding spinlocks. | 268 | * Can be called while holding spinlocks. |
244 | * | 269 | * |
245 | * NOTE: mmap_sem MUST be held between get_futex_key() and calling this | ||
246 | * function, if it is called at all. mmap_sem keeps key->shared.inode valid. | ||
247 | */ | 270 | */ |
248 | inline void get_futex_key_refs(union futex_key *key) | 271 | inline void get_futex_key_refs(union futex_key *key) |
249 | { | 272 | { |
250 | if (key->both.ptr != 0) { | 273 | if (key->both.ptr == 0) |
251 | if (key->both.offset & 1) | 274 | return; |
275 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | ||
276 | case FUT_OFF_INODE: | ||
252 | atomic_inc(&key->shared.inode->i_count); | 277 | atomic_inc(&key->shared.inode->i_count); |
253 | else | 278 | break; |
279 | case FUT_OFF_MMSHARED: | ||
254 | atomic_inc(&key->private.mm->mm_count); | 280 | atomic_inc(&key->private.mm->mm_count); |
281 | break; | ||
255 | } | 282 | } |
256 | } | 283 | } |
257 | EXPORT_SYMBOL_GPL(get_futex_key_refs); | 284 | EXPORT_SYMBOL_GPL(get_futex_key_refs); |
@@ -262,11 +289,15 @@ EXPORT_SYMBOL_GPL(get_futex_key_refs); | |||
262 | */ | 289 | */ |
263 | void drop_futex_key_refs(union futex_key *key) | 290 | void drop_futex_key_refs(union futex_key *key) |
264 | { | 291 | { |
265 | if (key->both.ptr != 0) { | 292 | if (key->both.ptr == 0) |
266 | if (key->both.offset & 1) | 293 | return; |
294 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | ||
295 | case FUT_OFF_INODE: | ||
267 | iput(key->shared.inode); | 296 | iput(key->shared.inode); |
268 | else | 297 | break; |
298 | case FUT_OFF_MMSHARED: | ||
269 | mmdrop(key->private.mm); | 299 | mmdrop(key->private.mm); |
300 | break; | ||
270 | } | 301 | } |
271 | } | 302 | } |
272 | EXPORT_SYMBOL_GPL(drop_futex_key_refs); | 303 | EXPORT_SYMBOL_GPL(drop_futex_key_refs); |
@@ -283,28 +314,38 @@ static inline int get_futex_value_locked(u32 *dest, u32 __user *from) | |||
283 | } | 314 | } |
284 | 315 | ||
285 | /* | 316 | /* |
286 | * Fault handling. Called with current->mm->mmap_sem held. | 317 | * Fault handling. |
318 | * if fshared is non NULL, current->mm->mmap_sem is already held | ||
287 | */ | 319 | */ |
288 | static int futex_handle_fault(unsigned long address, int attempt) | 320 | static int futex_handle_fault(unsigned long address, |
321 | struct rw_semaphore *fshared, int attempt) | ||
289 | { | 322 | { |
290 | struct vm_area_struct * vma; | 323 | struct vm_area_struct * vma; |
291 | struct mm_struct *mm = current->mm; | 324 | struct mm_struct *mm = current->mm; |
325 | int ret = -EFAULT; | ||
292 | 326 | ||
293 | if (attempt > 2 || !(vma = find_vma(mm, address)) || | 327 | if (attempt > 2) |
294 | vma->vm_start > address || !(vma->vm_flags & VM_WRITE)) | 328 | return ret; |
295 | return -EFAULT; | ||
296 | 329 | ||
297 | switch (handle_mm_fault(mm, vma, address, 1)) { | 330 | if (!fshared) |
298 | case VM_FAULT_MINOR: | 331 | down_read(&mm->mmap_sem); |
299 | current->min_flt++; | 332 | vma = find_vma(mm, address); |
300 | break; | 333 | if (vma && address >= vma->vm_start && |
301 | case VM_FAULT_MAJOR: | 334 | (vma->vm_flags & VM_WRITE)) { |
302 | current->maj_flt++; | 335 | switch (handle_mm_fault(mm, vma, address, 1)) { |
303 | break; | 336 | case VM_FAULT_MINOR: |
304 | default: | 337 | ret = 0; |
305 | return -EFAULT; | 338 | current->min_flt++; |
339 | break; | ||
340 | case VM_FAULT_MAJOR: | ||
341 | ret = 0; | ||
342 | current->maj_flt++; | ||
343 | break; | ||
344 | } | ||
306 | } | 345 | } |
307 | return 0; | 346 | if (!fshared) |
347 | up_read(&mm->mmap_sem); | ||
348 | return ret; | ||
308 | } | 349 | } |
309 | 350 | ||
310 | /* | 351 | /* |
@@ -647,7 +688,8 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) | |||
647 | * Wake up all waiters hashed on the physical page that is mapped | 688 | * Wake up all waiters hashed on the physical page that is mapped |
648 | * to this virtual address: | 689 | * to this virtual address: |
649 | */ | 690 | */ |
650 | static int futex_wake(u32 __user *uaddr, int nr_wake) | 691 | static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, |
692 | int nr_wake) | ||
651 | { | 693 | { |
652 | struct futex_hash_bucket *hb; | 694 | struct futex_hash_bucket *hb; |
653 | struct futex_q *this, *next; | 695 | struct futex_q *this, *next; |
@@ -655,9 +697,10 @@ static int futex_wake(u32 __user *uaddr, int nr_wake) | |||
655 | union futex_key key; | 697 | union futex_key key; |
656 | int ret; | 698 | int ret; |
657 | 699 | ||
658 | down_read(¤t->mm->mmap_sem); | 700 | if (fshared) |
701 | down_read(fshared); | ||
659 | 702 | ||
660 | ret = get_futex_key(uaddr, &key); | 703 | ret = get_futex_key(uaddr, fshared, &key); |
661 | if (unlikely(ret != 0)) | 704 | if (unlikely(ret != 0)) |
662 | goto out; | 705 | goto out; |
663 | 706 | ||
@@ -679,7 +722,8 @@ static int futex_wake(u32 __user *uaddr, int nr_wake) | |||
679 | 722 | ||
680 | spin_unlock(&hb->lock); | 723 | spin_unlock(&hb->lock); |
681 | out: | 724 | out: |
682 | up_read(¤t->mm->mmap_sem); | 725 | if (fshared) |
726 | up_read(fshared); | ||
683 | return ret; | 727 | return ret; |
684 | } | 728 | } |
685 | 729 | ||
@@ -746,7 +790,9 @@ retry: | |||
746 | * and requeue the next nr_requeue waiters following hashed on | 790 | * and requeue the next nr_requeue waiters following hashed on |
747 | * one physical page to another physical page (PI-futex uaddr2) | 791 | * one physical page to another physical page (PI-futex uaddr2) |
748 | */ | 792 | */ |
749 | static int futex_requeue_pi(u32 __user *uaddr1, u32 __user *uaddr2, | 793 | static int futex_requeue_pi(u32 __user *uaddr1, |
794 | struct rw_semaphore *fshared, | ||
795 | u32 __user *uaddr2, | ||
750 | int nr_wake, int nr_requeue, u32 *cmpval) | 796 | int nr_wake, int nr_requeue, u32 *cmpval) |
751 | { | 797 | { |
752 | union futex_key key1, key2; | 798 | union futex_key key1, key2; |
@@ -765,12 +811,13 @@ retry: | |||
765 | /* | 811 | /* |
766 | * First take all the futex related locks: | 812 | * First take all the futex related locks: |
767 | */ | 813 | */ |
768 | down_read(¤t->mm->mmap_sem); | 814 | if (fshared) |
815 | down_read(fshared); | ||
769 | 816 | ||
770 | ret = get_futex_key(uaddr1, &key1); | 817 | ret = get_futex_key(uaddr1, fshared, &key1); |
771 | if (unlikely(ret != 0)) | 818 | if (unlikely(ret != 0)) |
772 | goto out; | 819 | goto out; |
773 | ret = get_futex_key(uaddr2, &key2); | 820 | ret = get_futex_key(uaddr2, fshared, &key2); |
774 | if (unlikely(ret != 0)) | 821 | if (unlikely(ret != 0)) |
775 | goto out; | 822 | goto out; |
776 | 823 | ||
@@ -793,7 +840,8 @@ retry: | |||
793 | * If we would have faulted, release mmap_sem, fault | 840 | * If we would have faulted, release mmap_sem, fault |
794 | * it in and start all over again. | 841 | * it in and start all over again. |
795 | */ | 842 | */ |
796 | up_read(¤t->mm->mmap_sem); | 843 | if (fshared) |
844 | up_read(fshared); | ||
797 | 845 | ||
798 | ret = get_user(curval, uaddr1); | 846 | ret = get_user(curval, uaddr1); |
799 | 847 | ||
@@ -927,7 +975,8 @@ out_unlock: | |||
927 | drop_futex_key_refs(&key1); | 975 | drop_futex_key_refs(&key1); |
928 | 976 | ||
929 | out: | 977 | out: |
930 | up_read(¤t->mm->mmap_sem); | 978 | if (fshared) |
979 | up_read(fshared); | ||
931 | return ret; | 980 | return ret; |
932 | } | 981 | } |
933 | 982 | ||
@@ -936,7 +985,8 @@ out: | |||
936 | * to this virtual address: | 985 | * to this virtual address: |
937 | */ | 986 | */ |
938 | static int | 987 | static int |
939 | futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2, | 988 | futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, |
989 | u32 __user *uaddr2, | ||
940 | int nr_wake, int nr_wake2, int op) | 990 | int nr_wake, int nr_wake2, int op) |
941 | { | 991 | { |
942 | union futex_key key1, key2; | 992 | union futex_key key1, key2; |
@@ -946,12 +996,13 @@ futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2, | |||
946 | int ret, op_ret, attempt = 0; | 996 | int ret, op_ret, attempt = 0; |
947 | 997 | ||
948 | retryfull: | 998 | retryfull: |
949 | down_read(¤t->mm->mmap_sem); | 999 | if (fshared) |
1000 | down_read(fshared); | ||
950 | 1001 | ||
951 | ret = get_futex_key(uaddr1, &key1); | 1002 | ret = get_futex_key(uaddr1, fshared, &key1); |
952 | if (unlikely(ret != 0)) | 1003 | if (unlikely(ret != 0)) |
953 | goto out; | 1004 | goto out; |
954 | ret = get_futex_key(uaddr2, &key2); | 1005 | ret = get_futex_key(uaddr2, fshared, &key2); |
955 | if (unlikely(ret != 0)) | 1006 | if (unlikely(ret != 0)) |
956 | goto out; | 1007 | goto out; |
957 | 1008 | ||
@@ -991,11 +1042,10 @@ retry: | |||
991 | * still holding the mmap_sem. | 1042 | * still holding the mmap_sem. |
992 | */ | 1043 | */ |
993 | if (attempt++) { | 1044 | if (attempt++) { |
994 | if (futex_handle_fault((unsigned long)uaddr2, | 1045 | ret = futex_handle_fault((unsigned long)uaddr2, |
995 | attempt)) { | 1046 | fshared, attempt); |
996 | ret = -EFAULT; | 1047 | if (ret) |
997 | goto out; | 1048 | goto out; |
998 | } | ||
999 | goto retry; | 1049 | goto retry; |
1000 | } | 1050 | } |
1001 | 1051 | ||
@@ -1003,7 +1053,8 @@ retry: | |||
1003 | * If we would have faulted, release mmap_sem, | 1053 | * If we would have faulted, release mmap_sem, |
1004 | * fault it in and start all over again. | 1054 | * fault it in and start all over again. |
1005 | */ | 1055 | */ |
1006 | up_read(¤t->mm->mmap_sem); | 1056 | if (fshared) |
1057 | up_read(fshared); | ||
1007 | 1058 | ||
1008 | ret = get_user(dummy, uaddr2); | 1059 | ret = get_user(dummy, uaddr2); |
1009 | if (ret) | 1060 | if (ret) |
@@ -1040,7 +1091,8 @@ retry: | |||
1040 | if (hb1 != hb2) | 1091 | if (hb1 != hb2) |
1041 | spin_unlock(&hb2->lock); | 1092 | spin_unlock(&hb2->lock); |
1042 | out: | 1093 | out: |
1043 | up_read(¤t->mm->mmap_sem); | 1094 | if (fshared) |
1095 | up_read(fshared); | ||
1044 | return ret; | 1096 | return ret; |
1045 | } | 1097 | } |
1046 | 1098 | ||
@@ -1048,7 +1100,8 @@ out: | |||
1048 | * Requeue all waiters hashed on one physical page to another | 1100 | * Requeue all waiters hashed on one physical page to another |
1049 | * physical page. | 1101 | * physical page. |
1050 | */ | 1102 | */ |
1051 | static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, | 1103 | static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, |
1104 | u32 __user *uaddr2, | ||
1052 | int nr_wake, int nr_requeue, u32 *cmpval) | 1105 | int nr_wake, int nr_requeue, u32 *cmpval) |
1053 | { | 1106 | { |
1054 | union futex_key key1, key2; | 1107 | union futex_key key1, key2; |
@@ -1058,12 +1111,13 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, | |||
1058 | int ret, drop_count = 0; | 1111 | int ret, drop_count = 0; |
1059 | 1112 | ||
1060 | retry: | 1113 | retry: |
1061 | down_read(¤t->mm->mmap_sem); | 1114 | if (fshared) |
1115 | down_read(fshared); | ||
1062 | 1116 | ||
1063 | ret = get_futex_key(uaddr1, &key1); | 1117 | ret = get_futex_key(uaddr1, fshared, &key1); |
1064 | if (unlikely(ret != 0)) | 1118 | if (unlikely(ret != 0)) |
1065 | goto out; | 1119 | goto out; |
1066 | ret = get_futex_key(uaddr2, &key2); | 1120 | ret = get_futex_key(uaddr2, fshared, &key2); |
1067 | if (unlikely(ret != 0)) | 1121 | if (unlikely(ret != 0)) |
1068 | goto out; | 1122 | goto out; |
1069 | 1123 | ||
@@ -1086,7 +1140,8 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2, | |||
1086 | * If we would have faulted, release mmap_sem, fault | 1140 | * If we would have faulted, release mmap_sem, fault |
1087 | * it in and start all over again. | 1141 | * it in and start all over again. |
1088 | */ | 1142 | */ |
1089 | up_read(¤t->mm->mmap_sem); | 1143 | if (fshared) |
1144 | up_read(fshared); | ||
1090 | 1145 | ||
1091 | ret = get_user(curval, uaddr1); | 1146 | ret = get_user(curval, uaddr1); |
1092 | 1147 | ||
@@ -1139,7 +1194,8 @@ out_unlock: | |||
1139 | drop_futex_key_refs(&key1); | 1194 | drop_futex_key_refs(&key1); |
1140 | 1195 | ||
1141 | out: | 1196 | out: |
1142 | up_read(¤t->mm->mmap_sem); | 1197 | if (fshared) |
1198 | up_read(fshared); | ||
1143 | return ret; | 1199 | return ret; |
1144 | } | 1200 | } |
1145 | 1201 | ||
@@ -1273,7 +1329,8 @@ static void unqueue_me_pi(struct futex_q *q) | |||
1273 | * The cur->mm semaphore must be held, it is released at return of this | 1329 | * The cur->mm semaphore must be held, it is released at return of this |
1274 | * function. | 1330 | * function. |
1275 | */ | 1331 | */ |
1276 | static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | 1332 | static int fixup_pi_state_owner(u32 __user *uaddr, struct rw_semaphore *fshared, |
1333 | struct futex_q *q, | ||
1277 | struct futex_hash_bucket *hb, | 1334 | struct futex_hash_bucket *hb, |
1278 | struct task_struct *curr) | 1335 | struct task_struct *curr) |
1279 | { | 1336 | { |
@@ -1300,7 +1357,8 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
1300 | 1357 | ||
1301 | /* Unqueue and drop the lock */ | 1358 | /* Unqueue and drop the lock */ |
1302 | unqueue_me_pi(q); | 1359 | unqueue_me_pi(q); |
1303 | up_read(&curr->mm->mmap_sem); | 1360 | if (fshared) |
1361 | up_read(fshared); | ||
1304 | /* | 1362 | /* |
1305 | * We own it, so we have to replace the pending owner | 1363 | * We own it, so we have to replace the pending owner |
1306 | * TID. This must be atomic as we have preserve the | 1364 | * TID. This must be atomic as we have preserve the |
@@ -1321,8 +1379,15 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
1321 | return ret; | 1379 | return ret; |
1322 | } | 1380 | } |
1323 | 1381 | ||
1382 | /* | ||
1383 | * In case we must use restart_block to restart a futex_wait, | ||
1384 | * we encode in the 'arg3' shared capability | ||
1385 | */ | ||
1386 | #define ARG3_SHARED 1 | ||
1387 | |||
1324 | static long futex_wait_restart(struct restart_block *restart); | 1388 | static long futex_wait_restart(struct restart_block *restart); |
1325 | static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time) | 1389 | static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, |
1390 | u32 val, ktime_t *abs_time) | ||
1326 | { | 1391 | { |
1327 | struct task_struct *curr = current; | 1392 | struct task_struct *curr = current; |
1328 | DECLARE_WAITQUEUE(wait, curr); | 1393 | DECLARE_WAITQUEUE(wait, curr); |
@@ -1335,9 +1400,10 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time) | |||
1335 | 1400 | ||
1336 | q.pi_state = NULL; | 1401 | q.pi_state = NULL; |
1337 | retry: | 1402 | retry: |
1338 | down_read(&curr->mm->mmap_sem); | 1403 | if (fshared) |
1404 | down_read(fshared); | ||
1339 | 1405 | ||
1340 | ret = get_futex_key(uaddr, &q.key); | 1406 | ret = get_futex_key(uaddr, fshared, &q.key); |
1341 | if (unlikely(ret != 0)) | 1407 | if (unlikely(ret != 0)) |
1342 | goto out_release_sem; | 1408 | goto out_release_sem; |
1343 | 1409 | ||
@@ -1360,8 +1426,8 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time) | |||
1360 | * a wakeup when *uaddr != val on entry to the syscall. This is | 1426 | * a wakeup when *uaddr != val on entry to the syscall. This is |
1361 | * rare, but normal. | 1427 | * rare, but normal. |
1362 | * | 1428 | * |
1363 | * We hold the mmap semaphore, so the mapping cannot have changed | 1429 | * for shared futexes, we hold the mmap semaphore, so the mapping |
1364 | * since we looked it up in get_futex_key. | 1430 | * cannot have changed since we looked it up in get_futex_key. |
1365 | */ | 1431 | */ |
1366 | ret = get_futex_value_locked(&uval, uaddr); | 1432 | ret = get_futex_value_locked(&uval, uaddr); |
1367 | 1433 | ||
@@ -1372,7 +1438,8 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time) | |||
1372 | * If we would have faulted, release mmap_sem, fault it in and | 1438 | * If we would have faulted, release mmap_sem, fault it in and |
1373 | * start all over again. | 1439 | * start all over again. |
1374 | */ | 1440 | */ |
1375 | up_read(&curr->mm->mmap_sem); | 1441 | if (fshared) |
1442 | up_read(fshared); | ||
1376 | 1443 | ||
1377 | ret = get_user(uval, uaddr); | 1444 | ret = get_user(uval, uaddr); |
1378 | 1445 | ||
@@ -1399,7 +1466,8 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time) | |||
1399 | * Now the futex is queued and we have checked the data, we | 1466 | * Now the futex is queued and we have checked the data, we |
1400 | * don't want to hold mmap_sem while we sleep. | 1467 | * don't want to hold mmap_sem while we sleep. |
1401 | */ | 1468 | */ |
1402 | up_read(&curr->mm->mmap_sem); | 1469 | if (fshared) |
1470 | up_read(fshared); | ||
1403 | 1471 | ||
1404 | /* | 1472 | /* |
1405 | * There might have been scheduling since the queue_me(), as we | 1473 | * There might have been scheduling since the queue_me(), as we |
@@ -1469,7 +1537,8 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time) | |||
1469 | else | 1537 | else |
1470 | ret = rt_mutex_timed_lock(lock, to, 1); | 1538 | ret = rt_mutex_timed_lock(lock, to, 1); |
1471 | 1539 | ||
1472 | down_read(&curr->mm->mmap_sem); | 1540 | if (fshared) |
1541 | down_read(fshared); | ||
1473 | spin_lock(q.lock_ptr); | 1542 | spin_lock(q.lock_ptr); |
1474 | 1543 | ||
1475 | /* | 1544 | /* |
@@ -1486,7 +1555,8 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time) | |||
1486 | 1555 | ||
1487 | /* mmap_sem and hash_bucket lock are unlocked at | 1556 | /* mmap_sem and hash_bucket lock are unlocked at |
1488 | return of this function */ | 1557 | return of this function */ |
1489 | ret = fixup_pi_state_owner(uaddr, &q, hb, curr); | 1558 | ret = fixup_pi_state_owner(uaddr, fshared, |
1559 | &q, hb, curr); | ||
1490 | } else { | 1560 | } else { |
1491 | /* | 1561 | /* |
1492 | * Catch the rare case, where the lock was released | 1562 | * Catch the rare case, where the lock was released |
@@ -1499,7 +1569,8 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time) | |||
1499 | } | 1569 | } |
1500 | /* Unqueue and drop the lock */ | 1570 | /* Unqueue and drop the lock */ |
1501 | unqueue_me_pi(&q); | 1571 | unqueue_me_pi(&q); |
1502 | up_read(&curr->mm->mmap_sem); | 1572 | if (fshared) |
1573 | up_read(fshared); | ||
1503 | } | 1574 | } |
1504 | 1575 | ||
1505 | debug_rt_mutex_free_waiter(&q.waiter); | 1576 | debug_rt_mutex_free_waiter(&q.waiter); |
@@ -1528,6 +1599,9 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time) | |||
1528 | restart->arg0 = (unsigned long)uaddr; | 1599 | restart->arg0 = (unsigned long)uaddr; |
1529 | restart->arg1 = (unsigned long)val; | 1600 | restart->arg1 = (unsigned long)val; |
1530 | restart->arg2 = (unsigned long)abs_time; | 1601 | restart->arg2 = (unsigned long)abs_time; |
1602 | restart->arg3 = 0; | ||
1603 | if (fshared) | ||
1604 | restart->arg3 |= ARG3_SHARED; | ||
1531 | return -ERESTART_RESTARTBLOCK; | 1605 | return -ERESTART_RESTARTBLOCK; |
1532 | } | 1606 | } |
1533 | 1607 | ||
@@ -1535,7 +1609,8 @@ static int futex_wait(u32 __user *uaddr, u32 val, ktime_t *abs_time) | |||
1535 | queue_unlock(&q, hb); | 1609 | queue_unlock(&q, hb); |
1536 | 1610 | ||
1537 | out_release_sem: | 1611 | out_release_sem: |
1538 | up_read(&curr->mm->mmap_sem); | 1612 | if (fshared) |
1613 | up_read(fshared); | ||
1539 | return ret; | 1614 | return ret; |
1540 | } | 1615 | } |
1541 | 1616 | ||
@@ -1545,9 +1620,12 @@ static long futex_wait_restart(struct restart_block *restart) | |||
1545 | u32 __user *uaddr = (u32 __user *)restart->arg0; | 1620 | u32 __user *uaddr = (u32 __user *)restart->arg0; |
1546 | u32 val = (u32)restart->arg1; | 1621 | u32 val = (u32)restart->arg1; |
1547 | ktime_t *abs_time = (ktime_t *)restart->arg2; | 1622 | ktime_t *abs_time = (ktime_t *)restart->arg2; |
1623 | struct rw_semaphore *fshared = NULL; | ||
1548 | 1624 | ||
1549 | restart->fn = do_no_restart_syscall; | 1625 | restart->fn = do_no_restart_syscall; |
1550 | return (long)futex_wait(uaddr, val, abs_time); | 1626 | if (restart->arg3 & ARG3_SHARED) |
1627 | fshared = ¤t->mm->mmap_sem; | ||
1628 | return (long)futex_wait(uaddr, fshared, val, abs_time); | ||
1551 | } | 1629 | } |
1552 | 1630 | ||
1553 | 1631 | ||
@@ -1602,8 +1680,8 @@ static void set_pi_futex_owner(struct futex_hash_bucket *hb, | |||
1602 | * if there are waiters then it will block, it does PI, etc. (Due to | 1680 | * if there are waiters then it will block, it does PI, etc. (Due to |
1603 | * races the kernel might see a 0 value of the futex too.) | 1681 | * races the kernel might see a 0 value of the futex too.) |
1604 | */ | 1682 | */ |
1605 | static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time, | 1683 | static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, |
1606 | int trylock) | 1684 | int detect, ktime_t *time, int trylock) |
1607 | { | 1685 | { |
1608 | struct hrtimer_sleeper timeout, *to = NULL; | 1686 | struct hrtimer_sleeper timeout, *to = NULL; |
1609 | struct task_struct *curr = current; | 1687 | struct task_struct *curr = current; |
@@ -1624,9 +1702,10 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time, | |||
1624 | 1702 | ||
1625 | q.pi_state = NULL; | 1703 | q.pi_state = NULL; |
1626 | retry: | 1704 | retry: |
1627 | down_read(&curr->mm->mmap_sem); | 1705 | if (fshared) |
1706 | down_read(fshared); | ||
1628 | 1707 | ||
1629 | ret = get_futex_key(uaddr, &q.key); | 1708 | ret = get_futex_key(uaddr, fshared, &q.key); |
1630 | if (unlikely(ret != 0)) | 1709 | if (unlikely(ret != 0)) |
1631 | goto out_release_sem; | 1710 | goto out_release_sem; |
1632 | 1711 | ||
@@ -1747,7 +1826,8 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time, | |||
1747 | * Now the futex is queued and we have checked the data, we | 1826 | * Now the futex is queued and we have checked the data, we |
1748 | * don't want to hold mmap_sem while we sleep. | 1827 | * don't want to hold mmap_sem while we sleep. |
1749 | */ | 1828 | */ |
1750 | up_read(&curr->mm->mmap_sem); | 1829 | if (fshared) |
1830 | up_read(fshared); | ||
1751 | 1831 | ||
1752 | WARN_ON(!q.pi_state); | 1832 | WARN_ON(!q.pi_state); |
1753 | /* | 1833 | /* |
@@ -1761,7 +1841,8 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time, | |||
1761 | ret = ret ? 0 : -EWOULDBLOCK; | 1841 | ret = ret ? 0 : -EWOULDBLOCK; |
1762 | } | 1842 | } |
1763 | 1843 | ||
1764 | down_read(&curr->mm->mmap_sem); | 1844 | if (fshared) |
1845 | down_read(fshared); | ||
1765 | spin_lock(q.lock_ptr); | 1846 | spin_lock(q.lock_ptr); |
1766 | 1847 | ||
1767 | /* | 1848 | /* |
@@ -1770,7 +1851,7 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time, | |||
1770 | */ | 1851 | */ |
1771 | if (!ret && q.pi_state->owner != curr) | 1852 | if (!ret && q.pi_state->owner != curr) |
1772 | /* mmap_sem is unlocked at return of this function */ | 1853 | /* mmap_sem is unlocked at return of this function */ |
1773 | ret = fixup_pi_state_owner(uaddr, &q, hb, curr); | 1854 | ret = fixup_pi_state_owner(uaddr, fshared, &q, hb, curr); |
1774 | else { | 1855 | else { |
1775 | /* | 1856 | /* |
1776 | * Catch the rare case, where the lock was released | 1857 | * Catch the rare case, where the lock was released |
@@ -1783,7 +1864,8 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time, | |||
1783 | } | 1864 | } |
1784 | /* Unqueue and drop the lock */ | 1865 | /* Unqueue and drop the lock */ |
1785 | unqueue_me_pi(&q); | 1866 | unqueue_me_pi(&q); |
1786 | up_read(&curr->mm->mmap_sem); | 1867 | if (fshared) |
1868 | up_read(fshared); | ||
1787 | } | 1869 | } |
1788 | 1870 | ||
1789 | if (!detect && ret == -EDEADLK && 0) | 1871 | if (!detect && ret == -EDEADLK && 0) |
@@ -1795,7 +1877,8 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time, | |||
1795 | queue_unlock(&q, hb); | 1877 | queue_unlock(&q, hb); |
1796 | 1878 | ||
1797 | out_release_sem: | 1879 | out_release_sem: |
1798 | up_read(&curr->mm->mmap_sem); | 1880 | if (fshared) |
1881 | up_read(fshared); | ||
1799 | return ret; | 1882 | return ret; |
1800 | 1883 | ||
1801 | uaddr_faulted: | 1884 | uaddr_faulted: |
@@ -1806,15 +1889,16 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time, | |||
1806 | * still holding the mmap_sem. | 1889 | * still holding the mmap_sem. |
1807 | */ | 1890 | */ |
1808 | if (attempt++) { | 1891 | if (attempt++) { |
1809 | if (futex_handle_fault((unsigned long)uaddr, attempt)) { | 1892 | ret = futex_handle_fault((unsigned long)uaddr, fshared, |
1810 | ret = -EFAULT; | 1893 | attempt); |
1894 | if (ret) | ||
1811 | goto out_unlock_release_sem; | 1895 | goto out_unlock_release_sem; |
1812 | } | ||
1813 | goto retry_locked; | 1896 | goto retry_locked; |
1814 | } | 1897 | } |
1815 | 1898 | ||
1816 | queue_unlock(&q, hb); | 1899 | queue_unlock(&q, hb); |
1817 | up_read(&curr->mm->mmap_sem); | 1900 | if (fshared) |
1901 | up_read(fshared); | ||
1818 | 1902 | ||
1819 | ret = get_user(uval, uaddr); | 1903 | ret = get_user(uval, uaddr); |
1820 | if (!ret && (uval != -EFAULT)) | 1904 | if (!ret && (uval != -EFAULT)) |
@@ -1828,7 +1912,7 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, ktime_t *time, | |||
1828 | * This is the in-kernel slowpath: we look up the PI state (if any), | 1912 | * This is the in-kernel slowpath: we look up the PI state (if any), |
1829 | * and do the rt-mutex unlock. | 1913 | * and do the rt-mutex unlock. |
1830 | */ | 1914 | */ |
1831 | static int futex_unlock_pi(u32 __user *uaddr) | 1915 | static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared) |
1832 | { | 1916 | { |
1833 | struct futex_hash_bucket *hb; | 1917 | struct futex_hash_bucket *hb; |
1834 | struct futex_q *this, *next; | 1918 | struct futex_q *this, *next; |
@@ -1848,9 +1932,10 @@ retry: | |||
1848 | /* | 1932 | /* |
1849 | * First take all the futex related locks: | 1933 | * First take all the futex related locks: |
1850 | */ | 1934 | */ |
1851 | down_read(¤t->mm->mmap_sem); | 1935 | if (fshared) |
1936 | down_read(fshared); | ||
1852 | 1937 | ||
1853 | ret = get_futex_key(uaddr, &key); | 1938 | ret = get_futex_key(uaddr, fshared, &key); |
1854 | if (unlikely(ret != 0)) | 1939 | if (unlikely(ret != 0)) |
1855 | goto out; | 1940 | goto out; |
1856 | 1941 | ||
@@ -1909,7 +1994,8 @@ retry_locked: | |||
1909 | out_unlock: | 1994 | out_unlock: |
1910 | spin_unlock(&hb->lock); | 1995 | spin_unlock(&hb->lock); |
1911 | out: | 1996 | out: |
1912 | up_read(¤t->mm->mmap_sem); | 1997 | if (fshared) |
1998 | up_read(fshared); | ||
1913 | 1999 | ||
1914 | return ret; | 2000 | return ret; |
1915 | 2001 | ||
@@ -1921,15 +2007,16 @@ pi_faulted: | |||
1921 | * still holding the mmap_sem. | 2007 | * still holding the mmap_sem. |
1922 | */ | 2008 | */ |
1923 | if (attempt++) { | 2009 | if (attempt++) { |
1924 | if (futex_handle_fault((unsigned long)uaddr, attempt)) { | 2010 | ret = futex_handle_fault((unsigned long)uaddr, fshared, |
1925 | ret = -EFAULT; | 2011 | attempt); |
2012 | if (ret) | ||
1926 | goto out_unlock; | 2013 | goto out_unlock; |
1927 | } | ||
1928 | goto retry_locked; | 2014 | goto retry_locked; |
1929 | } | 2015 | } |
1930 | 2016 | ||
1931 | spin_unlock(&hb->lock); | 2017 | spin_unlock(&hb->lock); |
1932 | up_read(¤t->mm->mmap_sem); | 2018 | if (fshared) |
2019 | up_read(fshared); | ||
1933 | 2020 | ||
1934 | ret = get_user(uval, uaddr); | 2021 | ret = get_user(uval, uaddr); |
1935 | if (!ret && (uval != -EFAULT)) | 2022 | if (!ret && (uval != -EFAULT)) |
@@ -1981,6 +2068,7 @@ static int futex_fd(u32 __user *uaddr, int signal) | |||
1981 | struct futex_q *q; | 2068 | struct futex_q *q; |
1982 | struct file *filp; | 2069 | struct file *filp; |
1983 | int ret, err; | 2070 | int ret, err; |
2071 | struct rw_semaphore *fshared; | ||
1984 | static unsigned long printk_interval; | 2072 | static unsigned long printk_interval; |
1985 | 2073 | ||
1986 | if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { | 2074 | if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { |
@@ -2022,11 +2110,12 @@ static int futex_fd(u32 __user *uaddr, int signal) | |||
2022 | } | 2110 | } |
2023 | q->pi_state = NULL; | 2111 | q->pi_state = NULL; |
2024 | 2112 | ||
2025 | down_read(¤t->mm->mmap_sem); | 2113 | fshared = ¤t->mm->mmap_sem; |
2026 | err = get_futex_key(uaddr, &q->key); | 2114 | down_read(fshared); |
2115 | err = get_futex_key(uaddr, fshared, &q->key); | ||
2027 | 2116 | ||
2028 | if (unlikely(err != 0)) { | 2117 | if (unlikely(err != 0)) { |
2029 | up_read(¤t->mm->mmap_sem); | 2118 | up_read(fshared); |
2030 | kfree(q); | 2119 | kfree(q); |
2031 | goto error; | 2120 | goto error; |
2032 | } | 2121 | } |
@@ -2038,7 +2127,7 @@ static int futex_fd(u32 __user *uaddr, int signal) | |||
2038 | filp->private_data = q; | 2127 | filp->private_data = q; |
2039 | 2128 | ||
2040 | queue_me(q, ret, filp); | 2129 | queue_me(q, ret, filp); |
2041 | up_read(¤t->mm->mmap_sem); | 2130 | up_read(fshared); |
2042 | 2131 | ||
2043 | /* Now we map fd to filp, so userspace can access it */ | 2132 | /* Now we map fd to filp, so userspace can access it */ |
2044 | fd_install(ret, filp); | 2133 | fd_install(ret, filp); |
@@ -2167,7 +2256,7 @@ retry: | |||
2167 | */ | 2256 | */ |
2168 | if (!pi) { | 2257 | if (!pi) { |
2169 | if (uval & FUTEX_WAITERS) | 2258 | if (uval & FUTEX_WAITERS) |
2170 | futex_wake(uaddr, 1); | 2259 | futex_wake(uaddr, &curr->mm->mmap_sem, 1); |
2171 | } | 2260 | } |
2172 | } | 2261 | } |
2173 | return 0; | 2262 | return 0; |
@@ -2223,7 +2312,8 @@ void exit_robust_list(struct task_struct *curr) | |||
2223 | return; | 2312 | return; |
2224 | 2313 | ||
2225 | if (pending) | 2314 | if (pending) |
2226 | handle_futex_death((void __user *)pending + futex_offset, curr, pip); | 2315 | handle_futex_death((void __user *)pending + futex_offset, |
2316 | curr, pip); | ||
2227 | 2317 | ||
2228 | while (entry != &head->list) { | 2318 | while (entry != &head->list) { |
2229 | /* | 2319 | /* |
@@ -2253,38 +2343,43 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | |||
2253 | u32 __user *uaddr2, u32 val2, u32 val3) | 2343 | u32 __user *uaddr2, u32 val2, u32 val3) |
2254 | { | 2344 | { |
2255 | int ret; | 2345 | int ret; |
2346 | int cmd = op & FUTEX_CMD_MASK; | ||
2347 | struct rw_semaphore *fshared = NULL; | ||
2348 | |||
2349 | if (!(op & FUTEX_PRIVATE_FLAG)) | ||
2350 | fshared = ¤t->mm->mmap_sem; | ||
2256 | 2351 | ||
2257 | switch (op) { | 2352 | switch (cmd) { |
2258 | case FUTEX_WAIT: | 2353 | case FUTEX_WAIT: |
2259 | ret = futex_wait(uaddr, val, timeout); | 2354 | ret = futex_wait(uaddr, fshared, val, timeout); |
2260 | break; | 2355 | break; |
2261 | case FUTEX_WAKE: | 2356 | case FUTEX_WAKE: |
2262 | ret = futex_wake(uaddr, val); | 2357 | ret = futex_wake(uaddr, fshared, val); |
2263 | break; | 2358 | break; |
2264 | case FUTEX_FD: | 2359 | case FUTEX_FD: |
2265 | /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */ | 2360 | /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */ |
2266 | ret = futex_fd(uaddr, val); | 2361 | ret = futex_fd(uaddr, val); |
2267 | break; | 2362 | break; |
2268 | case FUTEX_REQUEUE: | 2363 | case FUTEX_REQUEUE: |
2269 | ret = futex_requeue(uaddr, uaddr2, val, val2, NULL); | 2364 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL); |
2270 | break; | 2365 | break; |
2271 | case FUTEX_CMP_REQUEUE: | 2366 | case FUTEX_CMP_REQUEUE: |
2272 | ret = futex_requeue(uaddr, uaddr2, val, val2, &val3); | 2367 | ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3); |
2273 | break; | 2368 | break; |
2274 | case FUTEX_WAKE_OP: | 2369 | case FUTEX_WAKE_OP: |
2275 | ret = futex_wake_op(uaddr, uaddr2, val, val2, val3); | 2370 | ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); |
2276 | break; | 2371 | break; |
2277 | case FUTEX_LOCK_PI: | 2372 | case FUTEX_LOCK_PI: |
2278 | ret = futex_lock_pi(uaddr, val, timeout, 0); | 2373 | ret = futex_lock_pi(uaddr, fshared, val, timeout, 0); |
2279 | break; | 2374 | break; |
2280 | case FUTEX_UNLOCK_PI: | 2375 | case FUTEX_UNLOCK_PI: |
2281 | ret = futex_unlock_pi(uaddr); | 2376 | ret = futex_unlock_pi(uaddr, fshared); |
2282 | break; | 2377 | break; |
2283 | case FUTEX_TRYLOCK_PI: | 2378 | case FUTEX_TRYLOCK_PI: |
2284 | ret = futex_lock_pi(uaddr, 0, timeout, 1); | 2379 | ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); |
2285 | break; | 2380 | break; |
2286 | case FUTEX_CMP_REQUEUE_PI: | 2381 | case FUTEX_CMP_REQUEUE_PI: |
2287 | ret = futex_requeue_pi(uaddr, uaddr2, val, val2, &val3); | 2382 | ret = futex_requeue_pi(uaddr, fshared, uaddr2, val, val2, &val3); |
2288 | break; | 2383 | break; |
2289 | default: | 2384 | default: |
2290 | ret = -ENOSYS; | 2385 | ret = -ENOSYS; |
@@ -2300,23 +2395,24 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, | |||
2300 | struct timespec ts; | 2395 | struct timespec ts; |
2301 | ktime_t t, *tp = NULL; | 2396 | ktime_t t, *tp = NULL; |
2302 | u32 val2 = 0; | 2397 | u32 val2 = 0; |
2398 | int cmd = op & FUTEX_CMD_MASK; | ||
2303 | 2399 | ||
2304 | if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) { | 2400 | if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) { |
2305 | if (copy_from_user(&ts, utime, sizeof(ts)) != 0) | 2401 | if (copy_from_user(&ts, utime, sizeof(ts)) != 0) |
2306 | return -EFAULT; | 2402 | return -EFAULT; |
2307 | if (!timespec_valid(&ts)) | 2403 | if (!timespec_valid(&ts)) |
2308 | return -EINVAL; | 2404 | return -EINVAL; |
2309 | 2405 | ||
2310 | t = timespec_to_ktime(ts); | 2406 | t = timespec_to_ktime(ts); |
2311 | if (op == FUTEX_WAIT) | 2407 | if (cmd == FUTEX_WAIT) |
2312 | t = ktime_add(ktime_get(), t); | 2408 | t = ktime_add(ktime_get(), t); |
2313 | tp = &t; | 2409 | tp = &t; |
2314 | } | 2410 | } |
2315 | /* | 2411 | /* |
2316 | * requeue parameter in 'utime' if op == FUTEX_REQUEUE. | 2412 | * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE. |
2317 | */ | 2413 | */ |
2318 | if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE | 2414 | if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE |
2319 | || op == FUTEX_CMP_REQUEUE_PI) | 2415 | || cmd == FUTEX_CMP_REQUEUE_PI) |
2320 | val2 = (u32) (unsigned long) utime; | 2416 | val2 = (u32) (unsigned long) utime; |
2321 | 2417 | ||
2322 | return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); | 2418 | return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); |