aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/futex.c
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2009-06-11 17:15:43 -0400
committerThomas Gleixner <tglx@linutronix.de>2009-06-24 15:27:35 -0400
commitd0725992c8a6fb63a16bc9e8b2a50094cc4db3cd (patch)
tree55b52c5bcc051c3b8c2fc3122000925541d5707b /kernel/futex.c
parentc82e6d450fda56cb2d4f68534173d3cd11b32f9f (diff)
futex: Fix the write access fault problem for real
commit 64d1304a64 (futex: setup writeable mapping for futex ops which modify user space data) did address only half of the problem of write access faults. The patch was made on two wrong assumptions: 1) access_ok(VERIFY_WRITE,...) would actually check write access. On x86 it does _NOT_. It's a pure address range check. 2) a RW mapped region can not go away under us. That's wrong as well. Nobody can prevent another thread to call mprotect(PROT_READ) on that region where the futex resides. If that call hits between the get_user_pages_fast() verification and the actual write access in the atomic region we are toast again. The solution is to not rely on access_ok and get_user() for any write access related fault on private and shared futexes. Instead we need to fault it in with verification of write access. There is no generic non destructive write mechanism which would fault the user page in trough a #PF, but as we already know that we will fault we can as well call get_user_pages() directly and avoid the #PF overhead. If get_user_pages() returns -EFAULT we know that we can not fix it anymore and need to bail out to user space. Remove a bunch of confusing comments on this issue as well. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: stable@kernel.org
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c45
1 files changed, 24 insertions, 21 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 80b5ce716596..1c337112335c 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -284,6 +284,25 @@ void put_futex_key(int fshared, union futex_key *key)
284 drop_futex_key_refs(key); 284 drop_futex_key_refs(key);
285} 285}
286 286
287/*
288 * fault_in_user_writeable - fault in user address and verify RW access
289 * @uaddr: pointer to faulting user space address
290 *
291 * Slow path to fixup the fault we just took in the atomic write
292 * access to @uaddr.
293 *
294 * We have no generic implementation of a non destructive write to the
295 * user address. We know that we faulted in the atomic pagefault
296 * disabled section so we can as well avoid the #PF overhead by
297 * calling get_user_pages() right away.
298 */
299static int fault_in_user_writeable(u32 __user *uaddr)
300{
301 int ret = get_user_pages(current, current->mm, (unsigned long)uaddr,
302 sizeof(*uaddr), 1, 0, NULL, NULL);
303 return ret < 0 ? ret : 0;
304}
305
287/** 306/**
288 * futex_top_waiter() - Return the highest priority waiter on a futex 307 * futex_top_waiter() - Return the highest priority waiter on a futex
289 * @hb: the hash bucket the futex_q's reside in 308 * @hb: the hash bucket the futex_q's reside in
@@ -896,7 +915,6 @@ retry:
896retry_private: 915retry_private:
897 op_ret = futex_atomic_op_inuser(op, uaddr2); 916 op_ret = futex_atomic_op_inuser(op, uaddr2);
898 if (unlikely(op_ret < 0)) { 917 if (unlikely(op_ret < 0)) {
899 u32 dummy;
900 918
901 double_unlock_hb(hb1, hb2); 919 double_unlock_hb(hb1, hb2);
902 920
@@ -914,7 +932,7 @@ retry_private:
914 goto out_put_keys; 932 goto out_put_keys;
915 } 933 }
916 934
917 ret = get_user(dummy, uaddr2); 935 ret = fault_in_user_writeable(uaddr2);
918 if (ret) 936 if (ret)
919 goto out_put_keys; 937 goto out_put_keys;
920 938
@@ -1204,7 +1222,7 @@ retry_private:
1204 double_unlock_hb(hb1, hb2); 1222 double_unlock_hb(hb1, hb2);
1205 put_futex_key(fshared, &key2); 1223 put_futex_key(fshared, &key2);
1206 put_futex_key(fshared, &key1); 1224 put_futex_key(fshared, &key1);
1207 ret = get_user(curval2, uaddr2); 1225 ret = fault_in_user_writeable(uaddr2);
1208 if (!ret) 1226 if (!ret)
1209 goto retry; 1227 goto retry;
1210 goto out; 1228 goto out;
@@ -1482,7 +1500,7 @@ retry:
1482handle_fault: 1500handle_fault:
1483 spin_unlock(q->lock_ptr); 1501 spin_unlock(q->lock_ptr);
1484 1502
1485 ret = get_user(uval, uaddr); 1503 ret = fault_in_user_writeable(uaddr);
1486 1504
1487 spin_lock(q->lock_ptr); 1505 spin_lock(q->lock_ptr);
1488 1506
@@ -1807,7 +1825,6 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
1807{ 1825{
1808 struct hrtimer_sleeper timeout, *to = NULL; 1826 struct hrtimer_sleeper timeout, *to = NULL;
1809 struct futex_hash_bucket *hb; 1827 struct futex_hash_bucket *hb;
1810 u32 uval;
1811 struct futex_q q; 1828 struct futex_q q;
1812 int res, ret; 1829 int res, ret;
1813 1830
@@ -1909,16 +1926,9 @@ out:
1909 return ret != -EINTR ? ret : -ERESTARTNOINTR; 1926 return ret != -EINTR ? ret : -ERESTARTNOINTR;
1910 1927
1911uaddr_faulted: 1928uaddr_faulted:
1912 /*
1913 * We have to r/w *(int __user *)uaddr, and we have to modify it
1914 * atomically. Therefore, if we continue to fault after get_user()
1915 * below, we need to handle the fault ourselves, while still holding
1916 * the mmap_sem. This can occur if the uaddr is under contention as
1917 * we have to drop the mmap_sem in order to call get_user().
1918 */
1919 queue_unlock(&q, hb); 1929 queue_unlock(&q, hb);
1920 1930
1921 ret = get_user(uval, uaddr); 1931 ret = fault_in_user_writeable(uaddr);
1922 if (ret) 1932 if (ret)
1923 goto out_put_key; 1933 goto out_put_key;
1924 1934
@@ -2013,17 +2023,10 @@ out:
2013 return ret; 2023 return ret;
2014 2024
2015pi_faulted: 2025pi_faulted:
2016 /*
2017 * We have to r/w *(int __user *)uaddr, and we have to modify it
2018 * atomically. Therefore, if we continue to fault after get_user()
2019 * below, we need to handle the fault ourselves, while still holding
2020 * the mmap_sem. This can occur if the uaddr is under contention as
2021 * we have to drop the mmap_sem in order to call get_user().
2022 */
2023 spin_unlock(&hb->lock); 2026 spin_unlock(&hb->lock);
2024 put_futex_key(fshared, &key); 2027 put_futex_key(fshared, &key);
2025 2028
2026 ret = get_user(uval, uaddr); 2029 ret = fault_in_user_writeable(uaddr);
2027 if (!ret) 2030 if (!ret)
2028 goto retry; 2031 goto retry;
2029 2032