aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-09-26 13:32:20 -0400
committerIngo Molnar <mingo@elte.hu>2008-09-30 06:35:20 -0400
commit38d47c1b7075bd7ec3881141bb3629da58f88dab (patch)
tree44c6ddc753738e69c908706a6b0cad04a7b59f95 /kernel
parent94aca1dac6f6d21f4b07e4864baf7768cabcc6e7 (diff)
futex: rely on get_user_pages() for shared futexes
On the way of getting rid of the mmap_sem requirement for shared futexes, start by relying on get_user_pages(). Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/futex.c162
1 files changed, 80 insertions, 82 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 7d1136e97c14..a4c39fa0a7a3 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -161,6 +161,45 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
161 && key1->both.offset == key2->both.offset); 161 && key1->both.offset == key2->both.offset);
162} 162}
163 163
164/*
165 * Take a reference to the resource addressed by a key.
166 * Can be called while holding spinlocks.
167 *
168 */
169static void get_futex_key_refs(union futex_key *key)
170{
171 if (!key->both.ptr)
172 return;
173
174 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
175 case FUT_OFF_INODE:
176 atomic_inc(&key->shared.inode->i_count);
177 break;
178 case FUT_OFF_MMSHARED:
179 atomic_inc(&key->private.mm->mm_count);
180 break;
181 }
182}
183
184/*
185 * Drop a reference to the resource addressed by a key.
186 * The hash bucket spinlock must not be held.
187 */
188static void drop_futex_key_refs(union futex_key *key)
189{
190 if (!key->both.ptr)
191 return;
192
193 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
194 case FUT_OFF_INODE:
195 iput(key->shared.inode);
196 break;
197 case FUT_OFF_MMSHARED:
198 mmdrop(key->private.mm);
199 break;
200 }
201}
202
164/** 203/**
165 * get_futex_key - Get parameters which are the keys for a futex. 204 * get_futex_key - Get parameters which are the keys for a futex.
166 * @uaddr: virtual address of the futex 205 * @uaddr: virtual address of the futex
@@ -184,7 +223,6 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
184{ 223{
185 unsigned long address = (unsigned long)uaddr; 224 unsigned long address = (unsigned long)uaddr;
186 struct mm_struct *mm = current->mm; 225 struct mm_struct *mm = current->mm;
187 struct vm_area_struct *vma;
188 struct page *page; 226 struct page *page;
189 int err; 227 int err;
190 228
@@ -210,98 +248,47 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
210 key->private.address = address; 248 key->private.address = address;
211 return 0; 249 return 0;
212 } 250 }
213 /*
214 * The futex is hashed differently depending on whether
215 * it's in a shared or private mapping. So check vma first.
216 */
217 vma = find_extend_vma(mm, address);
218 if (unlikely(!vma))
219 return -EFAULT;
220 251
221 /* 252again:
222 * Permissions. 253 err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
223 */ 254 if (err < 0)
224 if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) 255 return err;
225 return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES; 256
257 lock_page(page);
258 if (!page->mapping) {
259 unlock_page(page);
260 put_page(page);
261 goto again;
262 }
226 263
227 /* 264 /*
228 * Private mappings are handled in a simple way. 265 * Private mappings are handled in a simple way.
229 * 266 *
230 * NOTE: When userspace waits on a MAP_SHARED mapping, even if 267 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
231 * it's a read-only handle, it's expected that futexes attach to 268 * it's a read-only handle, it's expected that futexes attach to
232 * the object not the particular process. Therefore we use 269 * the object not the particular process.
233 * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
234 * mappings of _writable_ handles.
235 */ 270 */
236 if (likely(!(vma->vm_flags & VM_MAYSHARE))) { 271 if (PageAnon(page)) {
237 key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */ 272 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
238 key->private.mm = mm; 273 key->private.mm = mm;
239 key->private.address = address; 274 key->private.address = address;
240 return 0; 275 } else {
241 } 276 key->both.offset |= FUT_OFF_INODE; /* inode-based key */
242 277 key->shared.inode = page->mapping->host;
243 /* 278 key->shared.pgoff = page->index;
244 * Linear file mappings are also simple.
245 */
246 key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
247 key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
248 if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
249 key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
250 + vma->vm_pgoff);
251 return 0;
252 } 279 }
253 280
254 /* 281 get_futex_key_refs(key);
255 * We could walk the page table to read the non-linear
256 * pte, and get the page index without fetching the page
257 * from swap. But that's a lot of code to duplicate here
258 * for a rare case, so we simply fetch the page.
259 */
260 err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
261 if (err >= 0) {
262 key->shared.pgoff =
263 page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
264 put_page(page);
265 return 0;
266 }
267 return err;
268}
269 282
270/* 283 unlock_page(page);
271 * Take a reference to the resource addressed by a key. 284 put_page(page);
272 * Can be called while holding spinlocks. 285 return 0;
273 *
274 */
275static void get_futex_key_refs(union futex_key *key)
276{
277 if (key->both.ptr == NULL)
278 return;
279 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
280 case FUT_OFF_INODE:
281 atomic_inc(&key->shared.inode->i_count);
282 break;
283 case FUT_OFF_MMSHARED:
284 atomic_inc(&key->private.mm->mm_count);
285 break;
286 }
287} 286}
288 287
289/* 288static inline
290 * Drop a reference to the resource addressed by a key. 289void put_futex_key(struct rw_semaphore *fshared, union futex_key *key)
291 * The hash bucket spinlock must not be held.
292 */
293static void drop_futex_key_refs(union futex_key *key)
294{ 290{
295 if (!key->both.ptr) 291 drop_futex_key_refs(key);
296 return;
297 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
298 case FUT_OFF_INODE:
299 iput(key->shared.inode);
300 break;
301 case FUT_OFF_MMSHARED:
302 mmdrop(key->private.mm);
303 break;
304 }
305} 292}
306 293
307static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) 294static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@@ -385,6 +372,7 @@ static int refill_pi_state_cache(void)
385 /* pi_mutex gets initialized later */ 372 /* pi_mutex gets initialized later */
386 pi_state->owner = NULL; 373 pi_state->owner = NULL;
387 atomic_set(&pi_state->refcount, 1); 374 atomic_set(&pi_state->refcount, 1);
375 pi_state->key = FUTEX_KEY_INIT;
388 376
389 current->pi_state_cache = pi_state; 377 current->pi_state_cache = pi_state;
390 378
@@ -462,7 +450,7 @@ void exit_pi_state_list(struct task_struct *curr)
462 struct list_head *next, *head = &curr->pi_state_list; 450 struct list_head *next, *head = &curr->pi_state_list;
463 struct futex_pi_state *pi_state; 451 struct futex_pi_state *pi_state;
464 struct futex_hash_bucket *hb; 452 struct futex_hash_bucket *hb;
465 union futex_key key; 453 union futex_key key = FUTEX_KEY_INIT;
466 454
467 if (!futex_cmpxchg_enabled) 455 if (!futex_cmpxchg_enabled)
468 return; 456 return;
@@ -725,7 +713,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
725 struct futex_hash_bucket *hb; 713 struct futex_hash_bucket *hb;
726 struct futex_q *this, *next; 714 struct futex_q *this, *next;
727 struct plist_head *head; 715 struct plist_head *head;
728 union futex_key key; 716 union futex_key key = FUTEX_KEY_INIT;
729 int ret; 717 int ret;
730 718
731 if (!bitset) 719 if (!bitset)
@@ -760,6 +748,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
760 748
761 spin_unlock(&hb->lock); 749 spin_unlock(&hb->lock);
762out: 750out:
751 put_futex_key(fshared, &key);
763 futex_unlock_mm(fshared); 752 futex_unlock_mm(fshared);
764 return ret; 753 return ret;
765} 754}
@@ -773,7 +762,7 @@ futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
773 u32 __user *uaddr2, 762 u32 __user *uaddr2,
774 int nr_wake, int nr_wake2, int op) 763 int nr_wake, int nr_wake2, int op)
775{ 764{
776 union futex_key key1, key2; 765 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
777 struct futex_hash_bucket *hb1, *hb2; 766 struct futex_hash_bucket *hb1, *hb2;
778 struct plist_head *head; 767 struct plist_head *head;
779 struct futex_q *this, *next; 768 struct futex_q *this, *next;
@@ -873,6 +862,8 @@ retry:
873 if (hb1 != hb2) 862 if (hb1 != hb2)
874 spin_unlock(&hb2->lock); 863 spin_unlock(&hb2->lock);
875out: 864out:
865 put_futex_key(fshared, &key2);
866 put_futex_key(fshared, &key1);
876 futex_unlock_mm(fshared); 867 futex_unlock_mm(fshared);
877 868
878 return ret; 869 return ret;
@@ -886,7 +877,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
886 u32 __user *uaddr2, 877 u32 __user *uaddr2,
887 int nr_wake, int nr_requeue, u32 *cmpval) 878 int nr_wake, int nr_requeue, u32 *cmpval)
888{ 879{
889 union futex_key key1, key2; 880 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
890 struct futex_hash_bucket *hb1, *hb2; 881 struct futex_hash_bucket *hb1, *hb2;
891 struct plist_head *head1; 882 struct plist_head *head1;
892 struct futex_q *this, *next; 883 struct futex_q *this, *next;
@@ -974,6 +965,8 @@ out_unlock:
974 drop_futex_key_refs(&key1); 965 drop_futex_key_refs(&key1);
975 966
976out: 967out:
968 put_futex_key(fshared, &key2);
969 put_futex_key(fshared, &key1);
977 futex_unlock_mm(fshared); 970 futex_unlock_mm(fshared);
978 return ret; 971 return ret;
979} 972}
@@ -1220,6 +1213,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1220 retry: 1213 retry:
1221 futex_lock_mm(fshared); 1214 futex_lock_mm(fshared);
1222 1215
1216 q.key = FUTEX_KEY_INIT;
1223 ret = get_futex_key(uaddr, fshared, &q.key); 1217 ret = get_futex_key(uaddr, fshared, &q.key);
1224 if (unlikely(ret != 0)) 1218 if (unlikely(ret != 0))
1225 goto out_release_sem; 1219 goto out_release_sem;
@@ -1360,6 +1354,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1360 queue_unlock(&q, hb); 1354 queue_unlock(&q, hb);
1361 1355
1362 out_release_sem: 1356 out_release_sem:
1357 put_futex_key(fshared, &q.key);
1363 futex_unlock_mm(fshared); 1358 futex_unlock_mm(fshared);
1364 return ret; 1359 return ret;
1365} 1360}
@@ -1411,6 +1406,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1411 retry: 1406 retry:
1412 futex_lock_mm(fshared); 1407 futex_lock_mm(fshared);
1413 1408
1409 q.key = FUTEX_KEY_INIT;
1414 ret = get_futex_key(uaddr, fshared, &q.key); 1410 ret = get_futex_key(uaddr, fshared, &q.key);
1415 if (unlikely(ret != 0)) 1411 if (unlikely(ret != 0))
1416 goto out_release_sem; 1412 goto out_release_sem;
@@ -1625,6 +1621,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1625 queue_unlock(&q, hb); 1621 queue_unlock(&q, hb);
1626 1622
1627 out_release_sem: 1623 out_release_sem:
1624 put_futex_key(fshared, &q.key);
1628 futex_unlock_mm(fshared); 1625 futex_unlock_mm(fshared);
1629 if (to) 1626 if (to)
1630 destroy_hrtimer_on_stack(&to->timer); 1627 destroy_hrtimer_on_stack(&to->timer);
@@ -1671,7 +1668,7 @@ static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared)
1671 struct futex_q *this, *next; 1668 struct futex_q *this, *next;
1672 u32 uval; 1669 u32 uval;
1673 struct plist_head *head; 1670 struct plist_head *head;
1674 union futex_key key; 1671 union futex_key key = FUTEX_KEY_INIT;
1675 int ret, attempt = 0; 1672 int ret, attempt = 0;
1676 1673
1677retry: 1674retry:
@@ -1744,6 +1741,7 @@ retry_unlocked:
1744out_unlock: 1741out_unlock:
1745 spin_unlock(&hb->lock); 1742 spin_unlock(&hb->lock);
1746out: 1743out:
1744 put_futex_key(fshared, &key);
1747 futex_unlock_mm(fshared); 1745 futex_unlock_mm(fshared);
1748 1746
1749 return ret; 1747 return ret;