aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/futex.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c415
1 files changed, 9 insertions, 406 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 3b7f7713d9a4..df248f5e0836 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -56,12 +56,6 @@
56 56
57#include "rtmutex_common.h" 57#include "rtmutex_common.h"
58 58
59#ifdef CONFIG_DEBUG_RT_MUTEXES
60# include "rtmutex-debug.h"
61#else
62# include "rtmutex.h"
63#endif
64
65#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) 59#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
66 60
67/* 61/*
@@ -111,12 +105,6 @@ struct futex_q {
111 /* Optional priority inheritance state: */ 105 /* Optional priority inheritance state: */
112 struct futex_pi_state *pi_state; 106 struct futex_pi_state *pi_state;
113 struct task_struct *task; 107 struct task_struct *task;
114
115 /*
116 * This waiter is used in case of requeue from a
117 * normal futex to a PI-futex
118 */
119 struct rt_mutex_waiter waiter;
120}; 108};
121 109
122/* 110/*
@@ -216,9 +204,6 @@ int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
216 if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) 204 if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
217 return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES; 205 return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
218 206
219 /* Save the user address in the ley */
220 key->uaddr = uaddr;
221
222 /* 207 /*
223 * Private mappings are handled in a simple way. 208 * Private mappings are handled in a simple way.
224 * 209 *
@@ -636,8 +621,6 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
636 int ret = 0; 621 int ret = 0;
637 622
638 newval = FUTEX_WAITERS | new_owner->pid; 623 newval = FUTEX_WAITERS | new_owner->pid;
639 /* Keep the FUTEX_WAITER_REQUEUED flag if it was set */
640 newval |= (uval & FUTEX_WAITER_REQUEUED);
641 624
642 pagefault_disable(); 625 pagefault_disable();
643 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); 626 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
@@ -750,259 +733,6 @@ out:
750} 733}
751 734
752/* 735/*
753 * Called from futex_requeue_pi.
754 * Set FUTEX_WAITERS and FUTEX_WAITER_REQUEUED flags on the
755 * PI-futex value; search its associated pi_state if an owner exist
756 * or create a new one without owner.
757 */
758static inline int
759lookup_pi_state_for_requeue(u32 __user *uaddr, struct futex_hash_bucket *hb,
760 union futex_key *key,
761 struct futex_pi_state **pi_state)
762{
763 u32 curval, uval, newval;
764
765retry:
766 /*
767 * We can't handle a fault cleanly because we can't
768 * release the locks here. Simply return the fault.
769 */
770 if (get_futex_value_locked(&curval, uaddr))
771 return -EFAULT;
772
773 /* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */
774 if ((curval & (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED))
775 != (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED)) {
776 /*
777 * No waiters yet, we prepare the futex to have some waiters.
778 */
779
780 uval = curval;
781 newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED;
782
783 pagefault_disable();
784 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
785 pagefault_enable();
786
787 if (unlikely(curval == -EFAULT))
788 return -EFAULT;
789 if (unlikely(curval != uval))
790 goto retry;
791 }
792
793 if (!(curval & FUTEX_TID_MASK)
794 || lookup_pi_state(curval, hb, key, pi_state)) {
795 /* the futex has no owner (yet) or the lookup failed:
796 allocate one pi_state without owner */
797
798 *pi_state = alloc_pi_state();
799
800 /* Already stores the key: */
801 (*pi_state)->key = *key;
802
803 /* init the mutex without owner */
804 __rt_mutex_init(&(*pi_state)->pi_mutex, NULL);
805 }
806
807 return 0;
808}
809
810/*
811 * Keep the first nr_wake waiter from futex1, wake up one,
812 * and requeue the next nr_requeue waiters following hashed on
813 * one physical page to another physical page (PI-futex uaddr2)
814 */
815static int futex_requeue_pi(u32 __user *uaddr1,
816 struct rw_semaphore *fshared,
817 u32 __user *uaddr2,
818 int nr_wake, int nr_requeue, u32 *cmpval)
819{
820 union futex_key key1, key2;
821 struct futex_hash_bucket *hb1, *hb2;
822 struct plist_head *head1;
823 struct futex_q *this, *next;
824 struct futex_pi_state *pi_state2 = NULL;
825 struct rt_mutex_waiter *waiter, *top_waiter = NULL;
826 struct rt_mutex *lock2 = NULL;
827 int ret, drop_count = 0;
828
829 if (refill_pi_state_cache())
830 return -ENOMEM;
831
832retry:
833 /*
834 * First take all the futex related locks:
835 */
836 if (fshared)
837 down_read(fshared);
838
839 ret = get_futex_key(uaddr1, fshared, &key1);
840 if (unlikely(ret != 0))
841 goto out;
842 ret = get_futex_key(uaddr2, fshared, &key2);
843 if (unlikely(ret != 0))
844 goto out;
845
846 hb1 = hash_futex(&key1);
847 hb2 = hash_futex(&key2);
848
849 double_lock_hb(hb1, hb2);
850
851 if (likely(cmpval != NULL)) {
852 u32 curval;
853
854 ret = get_futex_value_locked(&curval, uaddr1);
855
856 if (unlikely(ret)) {
857 spin_unlock(&hb1->lock);
858 if (hb1 != hb2)
859 spin_unlock(&hb2->lock);
860
861 /*
862 * If we would have faulted, release mmap_sem, fault
863 * it in and start all over again.
864 */
865 if (fshared)
866 up_read(fshared);
867
868 ret = get_user(curval, uaddr1);
869
870 if (!ret)
871 goto retry;
872
873 return ret;
874 }
875 if (curval != *cmpval) {
876 ret = -EAGAIN;
877 goto out_unlock;
878 }
879 }
880
881 head1 = &hb1->chain;
882 plist_for_each_entry_safe(this, next, head1, list) {
883 if (!match_futex (&this->key, &key1))
884 continue;
885 if (++ret <= nr_wake) {
886 wake_futex(this);
887 } else {
888 /*
889 * FIRST: get and set the pi_state
890 */
891 if (!pi_state2) {
892 int s;
893 /* do this only the first time we requeue someone */
894 s = lookup_pi_state_for_requeue(uaddr2, hb2,
895 &key2, &pi_state2);
896 if (s) {
897 ret = s;
898 goto out_unlock;
899 }
900
901 lock2 = &pi_state2->pi_mutex;
902 spin_lock(&lock2->wait_lock);
903
904 /* Save the top waiter of the wait_list */
905 if (rt_mutex_has_waiters(lock2))
906 top_waiter = rt_mutex_top_waiter(lock2);
907 } else
908 atomic_inc(&pi_state2->refcount);
909
910
911 this->pi_state = pi_state2;
912
913 /*
914 * SECOND: requeue futex_q to the correct hashbucket
915 */
916
917 /*
918 * If key1 and key2 hash to the same bucket, no need to
919 * requeue.
920 */
921 if (likely(head1 != &hb2->chain)) {
922 plist_del(&this->list, &hb1->chain);
923 plist_add(&this->list, &hb2->chain);
924 this->lock_ptr = &hb2->lock;
925#ifdef CONFIG_DEBUG_PI_LIST
926 this->list.plist.lock = &hb2->lock;
927#endif
928 }
929 this->key = key2;
930 get_futex_key_refs(&key2);
931 drop_count++;
932
933
934 /*
935 * THIRD: queue it to lock2
936 */
937 spin_lock_irq(&this->task->pi_lock);
938 waiter = &this->waiter;
939 waiter->task = this->task;
940 waiter->lock = lock2;
941 plist_node_init(&waiter->list_entry, this->task->prio);
942 plist_node_init(&waiter->pi_list_entry, this->task->prio);
943 plist_add(&waiter->list_entry, &lock2->wait_list);
944 this->task->pi_blocked_on = waiter;
945 spin_unlock_irq(&this->task->pi_lock);
946
947 if (ret - nr_wake >= nr_requeue)
948 break;
949 }
950 }
951
952 /* If we've requeued some tasks and the top_waiter of the rt_mutex
953 has changed, we must adjust the priority of the owner, if any */
954 if (drop_count) {
955 struct task_struct *owner = rt_mutex_owner(lock2);
956 if (owner &&
957 (top_waiter != (waiter = rt_mutex_top_waiter(lock2)))) {
958 int chain_walk = 0;
959
960 spin_lock_irq(&owner->pi_lock);
961 if (top_waiter)
962 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
963 else
964 /*
965 * There was no waiters before the requeue,
966 * the flag must be updated
967 */
968 mark_rt_mutex_waiters(lock2);
969
970 plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
971 __rt_mutex_adjust_prio(owner);
972 if (owner->pi_blocked_on) {
973 chain_walk = 1;
974 get_task_struct(owner);
975 }
976
977 spin_unlock_irq(&owner->pi_lock);
978 spin_unlock(&lock2->wait_lock);
979
980 if (chain_walk)
981 rt_mutex_adjust_prio_chain(owner, 0, lock2, NULL,
982 current);
983 } else {
984 /* No owner or the top_waiter does not change */
985 mark_rt_mutex_waiters(lock2);
986 spin_unlock(&lock2->wait_lock);
987 }
988 }
989
990out_unlock:
991 spin_unlock(&hb1->lock);
992 if (hb1 != hb2)
993 spin_unlock(&hb2->lock);
994
995 /* drop_futex_key_refs() must be called outside the spinlocks. */
996 while (--drop_count >= 0)
997 drop_futex_key_refs(&key1);
998
999out:
1000 if (fshared)
1001 up_read(fshared);
1002 return ret;
1003}
1004
1005/*
1006 * Wake up all waiters hashed on the physical page that is mapped 736 * Wake up all waiters hashed on the physical page that is mapped
1007 * to this virtual address: 737 * to this virtual address:
1008 */ 738 */
@@ -1384,7 +1114,6 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1384 1114
1385 while (!ret) { 1115 while (!ret) {
1386 newval = (uval & FUTEX_OWNER_DIED) | newtid; 1116 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1387 newval |= (uval & FUTEX_WAITER_REQUEUED);
1388 1117
1389 pagefault_disable(); 1118 pagefault_disable();
1390 curval = futex_atomic_cmpxchg_inatomic(uaddr, 1119 curval = futex_atomic_cmpxchg_inatomic(uaddr,
@@ -1416,7 +1145,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1416 struct futex_q q; 1145 struct futex_q q;
1417 u32 uval; 1146 u32 uval;
1418 int ret; 1147 int ret;
1419 struct hrtimer_sleeper t, *to = NULL; 1148 struct hrtimer_sleeper t;
1420 int rem = 0; 1149 int rem = 0;
1421 1150
1422 q.pi_state = NULL; 1151 q.pi_state = NULL;
@@ -1472,14 +1201,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1472 if (uval != val) 1201 if (uval != val)
1473 goto out_unlock_release_sem; 1202 goto out_unlock_release_sem;
1474 1203
1475 /*
1476 * This rt_mutex_waiter structure is prepared here and will
1477 * be used only if this task is requeued from a normal futex to
1478 * a PI-futex with futex_requeue_pi.
1479 */
1480 debug_rt_mutex_init_waiter(&q.waiter);
1481 q.waiter.task = NULL;
1482
1483 /* Only actually queue if *uaddr contained val. */ 1204 /* Only actually queue if *uaddr contained val. */
1484 __queue_me(&q, hb); 1205 __queue_me(&q, hb);
1485 1206
@@ -1510,7 +1231,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1510 if (!abs_time) 1231 if (!abs_time)
1511 schedule(); 1232 schedule();
1512 else { 1233 else {
1513 to = &t;
1514 hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 1234 hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1515 hrtimer_init_sleeper(&t, current); 1235 hrtimer_init_sleeper(&t, current);
1516 t.timer.expires = *abs_time; 1236 t.timer.expires = *abs_time;
@@ -1538,67 +1258,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1538 * we are the only user of it. 1258 * we are the only user of it.
1539 */ 1259 */
1540 1260
1541 if (q.pi_state) {
1542 /*
1543 * We were woken but have been requeued on a PI-futex.
1544 * We have to complete the lock acquisition by taking
1545 * the rtmutex.
1546 */
1547
1548 struct rt_mutex *lock = &q.pi_state->pi_mutex;
1549
1550 spin_lock(&lock->wait_lock);
1551 if (unlikely(q.waiter.task)) {
1552 remove_waiter(lock, &q.waiter);
1553 }
1554 spin_unlock(&lock->wait_lock);
1555
1556 if (rem)
1557 ret = -ETIMEDOUT;
1558 else
1559 ret = rt_mutex_timed_lock(lock, to, 1);
1560
1561 if (fshared)
1562 down_read(fshared);
1563 spin_lock(q.lock_ptr);
1564
1565 /*
1566 * Got the lock. We might not be the anticipated owner if we
1567 * did a lock-steal - fix up the PI-state in that case.
1568 */
1569 if (!ret && q.pi_state->owner != curr) {
1570 /*
1571 * We MUST play with the futex we were requeued on,
1572 * NOT the current futex.
1573 * We can retrieve it from the key of the pi_state
1574 */
1575 uaddr = q.pi_state->key.uaddr;
1576
1577 ret = fixup_pi_state_owner(uaddr, &q, curr);
1578 } else {
1579 /*
1580 * Catch the rare case, where the lock was released
1581 * when we were on the way back before we locked
1582 * the hash bucket.
1583 */
1584 if (ret && q.pi_state->owner == curr) {
1585 if (rt_mutex_trylock(&q.pi_state->pi_mutex))
1586 ret = 0;
1587 }
1588 }
1589
1590 /* Unqueue and drop the lock */
1591 unqueue_me_pi(&q);
1592 if (fshared)
1593 up_read(fshared);
1594
1595 debug_rt_mutex_free_waiter(&q.waiter);
1596
1597 return ret;
1598 }
1599
1600 debug_rt_mutex_free_waiter(&q.waiter);
1601
1602 /* If we were woken (and unqueued), we succeeded, whatever. */ 1261 /* If we were woken (and unqueued), we succeeded, whatever. */
1603 if (!unqueue_me(&q)) 1262 if (!unqueue_me(&q))
1604 return 0; 1263 return 0;
@@ -1648,51 +1307,6 @@ static long futex_wait_restart(struct restart_block *restart)
1648} 1307}
1649 1308
1650 1309
1651static void set_pi_futex_owner(struct futex_hash_bucket *hb,
1652 union futex_key *key, struct task_struct *p)
1653{
1654 struct plist_head *head;
1655 struct futex_q *this, *next;
1656 struct futex_pi_state *pi_state = NULL;
1657 struct rt_mutex *lock;
1658
1659 /* Search a waiter that should already exists */
1660
1661 head = &hb->chain;
1662
1663 plist_for_each_entry_safe(this, next, head, list) {
1664 if (match_futex (&this->key, key)) {
1665 pi_state = this->pi_state;
1666 break;
1667 }
1668 }
1669
1670 BUG_ON(!pi_state);
1671
1672 /* set p as pi_state's owner */
1673 lock = &pi_state->pi_mutex;
1674
1675 spin_lock(&lock->wait_lock);
1676 spin_lock_irq(&p->pi_lock);
1677
1678 list_add(&pi_state->list, &p->pi_state_list);
1679 pi_state->owner = p;
1680
1681
1682 /* set p as pi_mutex's owner */
1683 debug_rt_mutex_proxy_lock(lock, p);
1684 WARN_ON(rt_mutex_owner(lock));
1685 rt_mutex_set_owner(lock, p, 0);
1686 rt_mutex_deadlock_account_lock(lock, p);
1687
1688 plist_add(&rt_mutex_top_waiter(lock)->pi_list_entry,
1689 &p->pi_waiters);
1690 __rt_mutex_adjust_prio(p);
1691
1692 spin_unlock_irq(&p->pi_lock);
1693 spin_unlock(&lock->wait_lock);
1694}
1695
1696/* 1310/*
1697 * Userspace tried a 0 -> TID atomic transition of the futex value 1311 * Userspace tried a 0 -> TID atomic transition of the futex value
1698 * and failed. The kernel side here does the whole locking operation: 1312 * and failed. The kernel side here does the whole locking operation:
@@ -1753,8 +1367,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1753 * situation and we return success to user space. 1367 * situation and we return success to user space.
1754 */ 1368 */
1755 if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) { 1369 if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
1756 if (!(curval & FUTEX_WAITER_REQUEUED)) 1370 ret = -EDEADLK;
1757 ret = -EDEADLK;
1758 goto out_unlock_release_sem; 1371 goto out_unlock_release_sem;
1759 } 1372 }
1760 1373
@@ -1774,14 +1387,14 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1774 1387
1775 /* 1388 /*
1776 * There are two cases, where a futex might have no owner (the 1389 * There are two cases, where a futex might have no owner (the
1777 * owner TID is 0): OWNER_DIED or REQUEUE. We take over the 1390 * owner TID is 0): OWNER_DIED. We take over the futex in this
1778 * futex in this case. We also do an unconditional take over, 1391 * case. We also do an unconditional take over, when the owner
1779 * when the owner of the futex died. 1392 * of the futex died.
1780 * 1393 *
1781 * This is safe as we are protected by the hash bucket lock ! 1394 * This is safe as we are protected by the hash bucket lock !
1782 */ 1395 */
1783 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { 1396 if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
1784 /* Keep the OWNER_DIED and REQUEUE bits */ 1397 /* Keep the OWNER_DIED bit */
1785 newval = (curval & ~FUTEX_TID_MASK) | current->pid; 1398 newval = (curval & ~FUTEX_TID_MASK) | current->pid;
1786 ownerdied = 0; 1399 ownerdied = 0;
1787 lock_taken = 1; 1400 lock_taken = 1;
@@ -1797,14 +1410,10 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1797 goto retry_locked; 1410 goto retry_locked;
1798 1411
1799 /* 1412 /*
1800 * We took the lock due to requeue or owner died take over. 1413 * We took the lock due to owner died take over.
1801 */ 1414 */
1802 if (unlikely(lock_taken)) { 1415 if (unlikely(lock_taken))
1803 /* For requeue we need to fixup the pi_futex */
1804 if (curval & FUTEX_WAITER_REQUEUED)
1805 set_pi_futex_owner(hb, &q.key, curr);
1806 goto out_unlock_release_sem; 1416 goto out_unlock_release_sem;
1807 }
1808 1417
1809 /* 1418 /*
1810 * We dont have the lock. Look up the PI state (or create it if 1419 * We dont have the lock. Look up the PI state (or create it if
@@ -2289,8 +1898,6 @@ retry:
2289 * userspace. 1898 * userspace.
2290 */ 1899 */
2291 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; 1900 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
2292 /* Also keep the FUTEX_WAITER_REQUEUED flag if set */
2293 mval |= (uval & FUTEX_WAITER_REQUEUED);
2294 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); 1901 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
2295 1902
2296 if (nval == -EFAULT) 1903 if (nval == -EFAULT)
@@ -2427,9 +2034,6 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2427 case FUTEX_TRYLOCK_PI: 2034 case FUTEX_TRYLOCK_PI:
2428 ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); 2035 ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
2429 break; 2036 break;
2430 case FUTEX_CMP_REQUEUE_PI:
2431 ret = futex_requeue_pi(uaddr, fshared, uaddr2, val, val2, &val3);
2432 break;
2433 default: 2037 default:
2434 ret = -ENOSYS; 2038 ret = -ENOSYS;
2435 } 2039 }
@@ -2460,8 +2064,7 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
2460 /* 2064 /*
2461 * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE. 2065 * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE.
2462 */ 2066 */
2463 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE 2067 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE)
2464 || cmd == FUTEX_CMP_REQUEUE_PI)
2465 val2 = (u32) (unsigned long) utime; 2068 val2 = (u32) (unsigned long) utime;
2466 2069
2467 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); 2070 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);