diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2008-09-26 13:32:20 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-09-30 06:35:20 -0400 |
commit | 38d47c1b7075bd7ec3881141bb3629da58f88dab (patch) | |
tree | 44c6ddc753738e69c908706a6b0cad04a7b59f95 | |
parent | 94aca1dac6f6d21f4b07e4864baf7768cabcc6e7 (diff) |
futex: rely on get_user_pages() for shared futexes
On the way of getting rid of the mmap_sem requirement for shared futexes,
start by relying on get_user_pages().
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/futex.h | 2 | ||||
-rw-r--r-- | kernel/futex.c | 162 |
2 files changed, 82 insertions, 82 deletions
diff --git a/include/linux/futex.h b/include/linux/futex.h index 586ab56a3ec3..8f627b9ae2b1 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h | |||
@@ -164,6 +164,8 @@ union futex_key { | |||
164 | } both; | 164 | } both; |
165 | }; | 165 | }; |
166 | 166 | ||
167 | #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } | ||
168 | |||
167 | #ifdef CONFIG_FUTEX | 169 | #ifdef CONFIG_FUTEX |
168 | extern void exit_robust_list(struct task_struct *curr); | 170 | extern void exit_robust_list(struct task_struct *curr); |
169 | extern void exit_pi_state_list(struct task_struct *curr); | 171 | extern void exit_pi_state_list(struct task_struct *curr); |
diff --git a/kernel/futex.c b/kernel/futex.c index 7d1136e97c14..a4c39fa0a7a3 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -161,6 +161,45 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2) | |||
161 | && key1->both.offset == key2->both.offset); | 161 | && key1->both.offset == key2->both.offset); |
162 | } | 162 | } |
163 | 163 | ||
164 | /* | ||
165 | * Take a reference to the resource addressed by a key. | ||
166 | * Can be called while holding spinlocks. | ||
167 | * | ||
168 | */ | ||
169 | static void get_futex_key_refs(union futex_key *key) | ||
170 | { | ||
171 | if (!key->both.ptr) | ||
172 | return; | ||
173 | |||
174 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | ||
175 | case FUT_OFF_INODE: | ||
176 | atomic_inc(&key->shared.inode->i_count); | ||
177 | break; | ||
178 | case FUT_OFF_MMSHARED: | ||
179 | atomic_inc(&key->private.mm->mm_count); | ||
180 | break; | ||
181 | } | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * Drop a reference to the resource addressed by a key. | ||
186 | * The hash bucket spinlock must not be held. | ||
187 | */ | ||
188 | static void drop_futex_key_refs(union futex_key *key) | ||
189 | { | ||
190 | if (!key->both.ptr) | ||
191 | return; | ||
192 | |||
193 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | ||
194 | case FUT_OFF_INODE: | ||
195 | iput(key->shared.inode); | ||
196 | break; | ||
197 | case FUT_OFF_MMSHARED: | ||
198 | mmdrop(key->private.mm); | ||
199 | break; | ||
200 | } | ||
201 | } | ||
202 | |||
164 | /** | 203 | /** |
165 | * get_futex_key - Get parameters which are the keys for a futex. | 204 | * get_futex_key - Get parameters which are the keys for a futex. |
166 | * @uaddr: virtual address of the futex | 205 | * @uaddr: virtual address of the futex |
@@ -184,7 +223,6 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
184 | { | 223 | { |
185 | unsigned long address = (unsigned long)uaddr; | 224 | unsigned long address = (unsigned long)uaddr; |
186 | struct mm_struct *mm = current->mm; | 225 | struct mm_struct *mm = current->mm; |
187 | struct vm_area_struct *vma; | ||
188 | struct page *page; | 226 | struct page *page; |
189 | int err; | 227 | int err; |
190 | 228 | ||
@@ -210,98 +248,47 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
210 | key->private.address = address; | 248 | key->private.address = address; |
211 | return 0; | 249 | return 0; |
212 | } | 250 | } |
213 | /* | ||
214 | * The futex is hashed differently depending on whether | ||
215 | * it's in a shared or private mapping. So check vma first. | ||
216 | */ | ||
217 | vma = find_extend_vma(mm, address); | ||
218 | if (unlikely(!vma)) | ||
219 | return -EFAULT; | ||
220 | 251 | ||
221 | /* | 252 | again: |
222 | * Permissions. | 253 | err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL); |
223 | */ | 254 | if (err < 0) |
224 | if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) | 255 | return err; |
225 | return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES; | 256 | |
257 | lock_page(page); | ||
258 | if (!page->mapping) { | ||
259 | unlock_page(page); | ||
260 | put_page(page); | ||
261 | goto again; | ||
262 | } | ||
226 | 263 | ||
227 | /* | 264 | /* |
228 | * Private mappings are handled in a simple way. | 265 | * Private mappings are handled in a simple way. |
229 | * | 266 | * |
230 | * NOTE: When userspace waits on a MAP_SHARED mapping, even if | 267 | * NOTE: When userspace waits on a MAP_SHARED mapping, even if |
231 | * it's a read-only handle, it's expected that futexes attach to | 268 | * it's a read-only handle, it's expected that futexes attach to |
232 | * the object not the particular process. Therefore we use | 269 | * the object not the particular process. |
233 | * VM_MAYSHARE here, not VM_SHARED which is restricted to shared | ||
234 | * mappings of _writable_ handles. | ||
235 | */ | 270 | */ |
236 | if (likely(!(vma->vm_flags & VM_MAYSHARE))) { | 271 | if (PageAnon(page)) { |
237 | key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */ | 272 | key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ |
238 | key->private.mm = mm; | 273 | key->private.mm = mm; |
239 | key->private.address = address; | 274 | key->private.address = address; |
240 | return 0; | 275 | } else { |
241 | } | 276 | key->both.offset |= FUT_OFF_INODE; /* inode-based key */ |
242 | 277 | key->shared.inode = page->mapping->host; | |
243 | /* | 278 | key->shared.pgoff = page->index; |
244 | * Linear file mappings are also simple. | ||
245 | */ | ||
246 | key->shared.inode = vma->vm_file->f_path.dentry->d_inode; | ||
247 | key->both.offset |= FUT_OFF_INODE; /* inode-based key. */ | ||
248 | if (likely(!(vma->vm_flags & VM_NONLINEAR))) { | ||
249 | key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT) | ||
250 | + vma->vm_pgoff); | ||
251 | return 0; | ||
252 | } | 279 | } |
253 | 280 | ||
254 | /* | 281 | get_futex_key_refs(key); |
255 | * We could walk the page table to read the non-linear | ||
256 | * pte, and get the page index without fetching the page | ||
257 | * from swap. But that's a lot of code to duplicate here | ||
258 | * for a rare case, so we simply fetch the page. | ||
259 | */ | ||
260 | err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL); | ||
261 | if (err >= 0) { | ||
262 | key->shared.pgoff = | ||
263 | page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
264 | put_page(page); | ||
265 | return 0; | ||
266 | } | ||
267 | return err; | ||
268 | } | ||
269 | 282 | ||
270 | /* | 283 | unlock_page(page); |
271 | * Take a reference to the resource addressed by a key. | 284 | put_page(page); |
272 | * Can be called while holding spinlocks. | 285 | return 0; |
273 | * | ||
274 | */ | ||
275 | static void get_futex_key_refs(union futex_key *key) | ||
276 | { | ||
277 | if (key->both.ptr == NULL) | ||
278 | return; | ||
279 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | ||
280 | case FUT_OFF_INODE: | ||
281 | atomic_inc(&key->shared.inode->i_count); | ||
282 | break; | ||
283 | case FUT_OFF_MMSHARED: | ||
284 | atomic_inc(&key->private.mm->mm_count); | ||
285 | break; | ||
286 | } | ||
287 | } | 286 | } |
288 | 287 | ||
289 | /* | 288 | static inline |
290 | * Drop a reference to the resource addressed by a key. | 289 | void put_futex_key(struct rw_semaphore *fshared, union futex_key *key) |
291 | * The hash bucket spinlock must not be held. | ||
292 | */ | ||
293 | static void drop_futex_key_refs(union futex_key *key) | ||
294 | { | 290 | { |
295 | if (!key->both.ptr) | 291 | drop_futex_key_refs(key); |
296 | return; | ||
297 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | ||
298 | case FUT_OFF_INODE: | ||
299 | iput(key->shared.inode); | ||
300 | break; | ||
301 | case FUT_OFF_MMSHARED: | ||
302 | mmdrop(key->private.mm); | ||
303 | break; | ||
304 | } | ||
305 | } | 292 | } |
306 | 293 | ||
307 | static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) | 294 | static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) |
@@ -385,6 +372,7 @@ static int refill_pi_state_cache(void) | |||
385 | /* pi_mutex gets initialized later */ | 372 | /* pi_mutex gets initialized later */ |
386 | pi_state->owner = NULL; | 373 | pi_state->owner = NULL; |
387 | atomic_set(&pi_state->refcount, 1); | 374 | atomic_set(&pi_state->refcount, 1); |
375 | pi_state->key = FUTEX_KEY_INIT; | ||
388 | 376 | ||
389 | current->pi_state_cache = pi_state; | 377 | current->pi_state_cache = pi_state; |
390 | 378 | ||
@@ -462,7 +450,7 @@ void exit_pi_state_list(struct task_struct *curr) | |||
462 | struct list_head *next, *head = &curr->pi_state_list; | 450 | struct list_head *next, *head = &curr->pi_state_list; |
463 | struct futex_pi_state *pi_state; | 451 | struct futex_pi_state *pi_state; |
464 | struct futex_hash_bucket *hb; | 452 | struct futex_hash_bucket *hb; |
465 | union futex_key key; | 453 | union futex_key key = FUTEX_KEY_INIT; |
466 | 454 | ||
467 | if (!futex_cmpxchg_enabled) | 455 | if (!futex_cmpxchg_enabled) |
468 | return; | 456 | return; |
@@ -725,7 +713,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
725 | struct futex_hash_bucket *hb; | 713 | struct futex_hash_bucket *hb; |
726 | struct futex_q *this, *next; | 714 | struct futex_q *this, *next; |
727 | struct plist_head *head; | 715 | struct plist_head *head; |
728 | union futex_key key; | 716 | union futex_key key = FUTEX_KEY_INIT; |
729 | int ret; | 717 | int ret; |
730 | 718 | ||
731 | if (!bitset) | 719 | if (!bitset) |
@@ -760,6 +748,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
760 | 748 | ||
761 | spin_unlock(&hb->lock); | 749 | spin_unlock(&hb->lock); |
762 | out: | 750 | out: |
751 | put_futex_key(fshared, &key); | ||
763 | futex_unlock_mm(fshared); | 752 | futex_unlock_mm(fshared); |
764 | return ret; | 753 | return ret; |
765 | } | 754 | } |
@@ -773,7 +762,7 @@ futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, | |||
773 | u32 __user *uaddr2, | 762 | u32 __user *uaddr2, |
774 | int nr_wake, int nr_wake2, int op) | 763 | int nr_wake, int nr_wake2, int op) |
775 | { | 764 | { |
776 | union futex_key key1, key2; | 765 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; |
777 | struct futex_hash_bucket *hb1, *hb2; | 766 | struct futex_hash_bucket *hb1, *hb2; |
778 | struct plist_head *head; | 767 | struct plist_head *head; |
779 | struct futex_q *this, *next; | 768 | struct futex_q *this, *next; |
@@ -873,6 +862,8 @@ retry: | |||
873 | if (hb1 != hb2) | 862 | if (hb1 != hb2) |
874 | spin_unlock(&hb2->lock); | 863 | spin_unlock(&hb2->lock); |
875 | out: | 864 | out: |
865 | put_futex_key(fshared, &key2); | ||
866 | put_futex_key(fshared, &key1); | ||
876 | futex_unlock_mm(fshared); | 867 | futex_unlock_mm(fshared); |
877 | 868 | ||
878 | return ret; | 869 | return ret; |
@@ -886,7 +877,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, | |||
886 | u32 __user *uaddr2, | 877 | u32 __user *uaddr2, |
887 | int nr_wake, int nr_requeue, u32 *cmpval) | 878 | int nr_wake, int nr_requeue, u32 *cmpval) |
888 | { | 879 | { |
889 | union futex_key key1, key2; | 880 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; |
890 | struct futex_hash_bucket *hb1, *hb2; | 881 | struct futex_hash_bucket *hb1, *hb2; |
891 | struct plist_head *head1; | 882 | struct plist_head *head1; |
892 | struct futex_q *this, *next; | 883 | struct futex_q *this, *next; |
@@ -974,6 +965,8 @@ out_unlock: | |||
974 | drop_futex_key_refs(&key1); | 965 | drop_futex_key_refs(&key1); |
975 | 966 | ||
976 | out: | 967 | out: |
968 | put_futex_key(fshared, &key2); | ||
969 | put_futex_key(fshared, &key1); | ||
977 | futex_unlock_mm(fshared); | 970 | futex_unlock_mm(fshared); |
978 | return ret; | 971 | return ret; |
979 | } | 972 | } |
@@ -1220,6 +1213,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1220 | retry: | 1213 | retry: |
1221 | futex_lock_mm(fshared); | 1214 | futex_lock_mm(fshared); |
1222 | 1215 | ||
1216 | q.key = FUTEX_KEY_INIT; | ||
1223 | ret = get_futex_key(uaddr, fshared, &q.key); | 1217 | ret = get_futex_key(uaddr, fshared, &q.key); |
1224 | if (unlikely(ret != 0)) | 1218 | if (unlikely(ret != 0)) |
1225 | goto out_release_sem; | 1219 | goto out_release_sem; |
@@ -1360,6 +1354,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1360 | queue_unlock(&q, hb); | 1354 | queue_unlock(&q, hb); |
1361 | 1355 | ||
1362 | out_release_sem: | 1356 | out_release_sem: |
1357 | put_futex_key(fshared, &q.key); | ||
1363 | futex_unlock_mm(fshared); | 1358 | futex_unlock_mm(fshared); |
1364 | return ret; | 1359 | return ret; |
1365 | } | 1360 | } |
@@ -1411,6 +1406,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1411 | retry: | 1406 | retry: |
1412 | futex_lock_mm(fshared); | 1407 | futex_lock_mm(fshared); |
1413 | 1408 | ||
1409 | q.key = FUTEX_KEY_INIT; | ||
1414 | ret = get_futex_key(uaddr, fshared, &q.key); | 1410 | ret = get_futex_key(uaddr, fshared, &q.key); |
1415 | if (unlikely(ret != 0)) | 1411 | if (unlikely(ret != 0)) |
1416 | goto out_release_sem; | 1412 | goto out_release_sem; |
@@ -1625,6 +1621,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
1625 | queue_unlock(&q, hb); | 1621 | queue_unlock(&q, hb); |
1626 | 1622 | ||
1627 | out_release_sem: | 1623 | out_release_sem: |
1624 | put_futex_key(fshared, &q.key); | ||
1628 | futex_unlock_mm(fshared); | 1625 | futex_unlock_mm(fshared); |
1629 | if (to) | 1626 | if (to) |
1630 | destroy_hrtimer_on_stack(&to->timer); | 1627 | destroy_hrtimer_on_stack(&to->timer); |
@@ -1671,7 +1668,7 @@ static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared) | |||
1671 | struct futex_q *this, *next; | 1668 | struct futex_q *this, *next; |
1672 | u32 uval; | 1669 | u32 uval; |
1673 | struct plist_head *head; | 1670 | struct plist_head *head; |
1674 | union futex_key key; | 1671 | union futex_key key = FUTEX_KEY_INIT; |
1675 | int ret, attempt = 0; | 1672 | int ret, attempt = 0; |
1676 | 1673 | ||
1677 | retry: | 1674 | retry: |
@@ -1744,6 +1741,7 @@ retry_unlocked: | |||
1744 | out_unlock: | 1741 | out_unlock: |
1745 | spin_unlock(&hb->lock); | 1742 | spin_unlock(&hb->lock); |
1746 | out: | 1743 | out: |
1744 | put_futex_key(fshared, &key); | ||
1747 | futex_unlock_mm(fshared); | 1745 | futex_unlock_mm(fshared); |
1748 | 1746 | ||
1749 | return ret; | 1747 | return ret; |