aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/futex.h8
-rw-r--r--include/linux/thread_info.h3
-rw-r--r--kernel/futex.c519
3 files changed, 510 insertions, 20 deletions
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 3bf5bb5a34f9..b05519ca9e57 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -23,6 +23,9 @@ union ktime;
23#define FUTEX_TRYLOCK_PI 8 23#define FUTEX_TRYLOCK_PI 8
24#define FUTEX_WAIT_BITSET 9 24#define FUTEX_WAIT_BITSET 9
25#define FUTEX_WAKE_BITSET 10 25#define FUTEX_WAKE_BITSET 10
26#define FUTEX_WAIT_REQUEUE_PI 11
27#define FUTEX_REQUEUE_PI 12
28#define FUTEX_CMP_REQUEUE_PI 13
26 29
27#define FUTEX_PRIVATE_FLAG 128 30#define FUTEX_PRIVATE_FLAG 128
28#define FUTEX_CLOCK_REALTIME 256 31#define FUTEX_CLOCK_REALTIME 256
@@ -38,6 +41,11 @@ union ktime;
38#define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) 41#define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG)
39#define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) 42#define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG)
40#define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) 43#define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG)
44#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \
45 FUTEX_PRIVATE_FLAG)
46#define FUTEX_REQUEUE_PI_PRIVATE (FUTEX_REQUEUE_PI | FUTEX_PRIVATE_FLAG)
47#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \
48 FUTEX_PRIVATE_FLAG)
41 49
42/* 50/*
43 * Support for robust futexes: the kernel cleans up held futexes at 51 * Support for robust futexes: the kernel cleans up held futexes at
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index e6b820f8b56b..a8cc4e13434c 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -21,13 +21,14 @@ struct restart_block {
21 struct { 21 struct {
22 unsigned long arg0, arg1, arg2, arg3; 22 unsigned long arg0, arg1, arg2, arg3;
23 }; 23 };
24 /* For futex_wait */ 24 /* For futex_wait and futex_wait_requeue_pi */
25 struct { 25 struct {
26 u32 *uaddr; 26 u32 *uaddr;
27 u32 val; 27 u32 val;
28 u32 flags; 28 u32 flags;
29 u32 bitset; 29 u32 bitset;
30 u64 time; 30 u64 time;
31 u32 *uaddr2;
31 } futex; 32 } futex;
32 /* For nanosleep */ 33 /* For nanosleep */
33 struct { 34 struct {
diff --git a/kernel/futex.c b/kernel/futex.c
index dbe857aa4381..185c981d89e3 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -19,6 +19,10 @@
19 * PRIVATE futexes by Eric Dumazet 19 * PRIVATE futexes by Eric Dumazet
20 * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com> 20 * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
21 * 21 *
22 * Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
23 * Copyright (C) IBM Corporation, 2009
24 * Thanks to Thomas Gleixner for conceptual design and careful reviews.
25 *
22 * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly 26 * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
23 * enough at me, Linus for the original (flawed) idea, Matthew 27 * enough at me, Linus for the original (flawed) idea, Matthew
24 * Kirkwood for proof-of-concept implementation. 28 * Kirkwood for proof-of-concept implementation.
@@ -109,6 +113,9 @@ struct futex_q {
109 struct futex_pi_state *pi_state; 113 struct futex_pi_state *pi_state;
110 struct task_struct *task; 114 struct task_struct *task;
111 115
116 /* rt_waiter storage for requeue_pi: */
117 struct rt_mutex_waiter *rt_waiter;
118
112 /* Bitset for the optional bitmasked wakeup */ 119 /* Bitset for the optional bitmasked wakeup */
113 u32 bitset; 120 u32 bitset;
114}; 121};
@@ -827,7 +834,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
827 834
828 plist_for_each_entry_safe(this, next, head, list) { 835 plist_for_each_entry_safe(this, next, head, list) {
829 if (match_futex (&this->key, &key)) { 836 if (match_futex (&this->key, &key)) {
830 if (this->pi_state) { 837 if (this->pi_state || this->rt_waiter) {
831 ret = -EINVAL; 838 ret = -EINVAL;
832 break; 839 break;
833 } 840 }
@@ -968,20 +975,138 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
968 q->key = *key2; 975 q->key = *key2;
969} 976}
970 977
971/* 978/**
972 * Requeue all waiters hashed on one physical page to another 979 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
973 * physical page. 980 * q: the futex_q
981 * key: the key of the requeue target futex
982 *
983 * During futex_requeue, with requeue_pi=1, it is possible to acquire the
984 * target futex if it is uncontended or via a lock steal. Set the futex_q key
985 * to the requeue target futex so the waiter can detect the wakeup on the right
986 * futex, but remove it from the hb and NULL the rt_waiter so it can detect
987 * atomic lock acquisition. Must be called with the q->lock_ptr held.
988 */
989static inline
990void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
991{
992 drop_futex_key_refs(&q->key);
993 get_futex_key_refs(key);
994 q->key = *key;
995
996 WARN_ON(plist_node_empty(&q->list));
997 plist_del(&q->list, &q->list.plist);
998
999 WARN_ON(!q->rt_waiter);
1000 q->rt_waiter = NULL;
1001
1002 wake_up(&q->waiter);
1003}
1004
1005/**
1006 * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
1007 * @pifutex: the user address of the to futex
1008 * @hb1: the from futex hash bucket, must be locked by the caller
1009 * @hb2: the to futex hash bucket, must be locked by the caller
1010 * @key1: the from futex key
1011 * @key2: the to futex key
1012 *
1013 * Try and get the lock on behalf of the top waiter if we can do it atomically.
1014 * Wake the top waiter if we succeed. hb1 and hb2 must be held by the caller.
1015 *
1016 * Returns:
1017 * 0 - failed to acquire the lock atomicly
1018 * 1 - acquired the lock
1019 * <0 - error
1020 */
1021static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1022 struct futex_hash_bucket *hb1,
1023 struct futex_hash_bucket *hb2,
1024 union futex_key *key1, union futex_key *key2,
1025 struct futex_pi_state **ps)
1026{
1027 struct futex_q *top_waiter;
1028 u32 curval;
1029 int ret;
1030
1031 if (get_futex_value_locked(&curval, pifutex))
1032 return -EFAULT;
1033
1034 top_waiter = futex_top_waiter(hb1, key1);
1035
1036 /* There are no waiters, nothing for us to do. */
1037 if (!top_waiter)
1038 return 0;
1039
1040 /*
1041 * Either take the lock for top_waiter or set the FUTEX_WAITERS bit.
1042 * The pi_state is returned in ps in contended cases.
1043 */
1044 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task);
1045 if (ret == 1)
1046 requeue_pi_wake_futex(top_waiter, key2);
1047
1048 return ret;
1049}
1050
1051/**
1052 * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
1053 * uaddr1: source futex user address
1054 * uaddr2: target futex user address
1055 * nr_wake: number of waiters to wake (must be 1 for requeue_pi)
1056 * nr_requeue: number of waiters to requeue (0-INT_MAX)
1057 * requeue_pi: if we are attempting to requeue from a non-pi futex to a
1058 * pi futex (pi to pi requeue is not supported)
1059 *
1060 * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
1061 * uaddr2 atomically on behalf of the top waiter.
1062 *
1063 * Returns:
1064 * >=0 - on success, the number of tasks requeued or woken
1065 * <0 - on error
974 */ 1066 */
975static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, 1067static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
976 int nr_wake, int nr_requeue, u32 *cmpval) 1068 int nr_wake, int nr_requeue, u32 *cmpval,
1069 int requeue_pi)
977{ 1070{
978 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; 1071 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
1072 int drop_count = 0, task_count = 0, ret;
1073 struct futex_pi_state *pi_state = NULL;
979 struct futex_hash_bucket *hb1, *hb2; 1074 struct futex_hash_bucket *hb1, *hb2;
980 struct plist_head *head1; 1075 struct plist_head *head1;
981 struct futex_q *this, *next; 1076 struct futex_q *this, *next;
982 int ret, drop_count = 0; 1077 u32 curval2;
1078
1079 if (requeue_pi) {
1080 /*
1081 * requeue_pi requires a pi_state, try to allocate it now
1082 * without any locks in case it fails.
1083 */
1084 if (refill_pi_state_cache())
1085 return -ENOMEM;
1086 /*
1087 * requeue_pi must wake as many tasks as it can, up to nr_wake
1088 * + nr_requeue, since it acquires the rt_mutex prior to
1089 * returning to userspace, so as to not leave the rt_mutex with
1090 * waiters and no owner. However, second and third wake-ups
1091 * cannot be predicted as they involve race conditions with the
1092 * first wake and a fault while looking up the pi_state. Both
1093 * pthread_cond_signal() and pthread_cond_broadcast() should
1094 * use nr_wake=1.
1095 */
1096 if (nr_wake != 1)
1097 return -EINVAL;
1098 }
983 1099
984retry: 1100retry:
1101 if (pi_state != NULL) {
1102 /*
1103 * We will have to lookup the pi_state again, so free this one
1104 * to keep the accounting correct.
1105 */
1106 free_pi_state(pi_state);
1107 pi_state = NULL;
1108 }
1109
985 ret = get_futex_key(uaddr1, fshared, &key1); 1110 ret = get_futex_key(uaddr1, fshared, &key1);
986 if (unlikely(ret != 0)) 1111 if (unlikely(ret != 0))
987 goto out; 1112 goto out;
@@ -1020,19 +1145,94 @@ retry_private:
1020 } 1145 }
1021 } 1146 }
1022 1147
1148 if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
1149 /* Attempt to acquire uaddr2 and wake the top_waiter. */
1150 ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
1151 &key2, &pi_state);
1152
1153 /*
1154 * At this point the top_waiter has either taken uaddr2 or is
1155 * waiting on it. If the former, then the pi_state will not
1156 * exist yet, look it up one more time to ensure we have a
1157 * reference to it.
1158 */
1159 if (ret == 1) {
1160 WARN_ON(pi_state);
1161 task_count++;
1162 ret = get_futex_value_locked(&curval2, uaddr2);
1163 if (!ret)
1164 ret = lookup_pi_state(curval2, hb2, &key2,
1165 &pi_state);
1166 }
1167
1168 switch (ret) {
1169 case 0:
1170 break;
1171 case -EFAULT:
1172 double_unlock_hb(hb1, hb2);
1173 put_futex_key(fshared, &key2);
1174 put_futex_key(fshared, &key1);
1175 ret = get_user(curval2, uaddr2);
1176 if (!ret)
1177 goto retry;
1178 goto out;
1179 case -EAGAIN:
1180 /* The owner was exiting, try again. */
1181 double_unlock_hb(hb1, hb2);
1182 put_futex_key(fshared, &key2);
1183 put_futex_key(fshared, &key1);
1184 cond_resched();
1185 goto retry;
1186 default:
1187 goto out_unlock;
1188 }
1189 }
1190
1023 head1 = &hb1->chain; 1191 head1 = &hb1->chain;
1024 plist_for_each_entry_safe(this, next, head1, list) { 1192 plist_for_each_entry_safe(this, next, head1, list) {
1025 if (!match_futex (&this->key, &key1)) 1193 if (task_count - nr_wake >= nr_requeue)
1194 break;
1195
1196 if (!match_futex(&this->key, &key1))
1026 continue; 1197 continue;
1027 if (++ret <= nr_wake) { 1198
1199 WARN_ON(!requeue_pi && this->rt_waiter);
1200 WARN_ON(requeue_pi && !this->rt_waiter);
1201
1202 /*
1203 * Wake nr_wake waiters. For requeue_pi, if we acquired the
1204 * lock, we already woke the top_waiter. If not, it will be
1205 * woken by futex_unlock_pi().
1206 */
1207 if (++task_count <= nr_wake && !requeue_pi) {
1028 wake_futex(this); 1208 wake_futex(this);
1029 } else { 1209 continue;
1030 requeue_futex(this, hb1, hb2, &key2); 1210 }
1031 drop_count++;
1032 1211
1033 if (ret - nr_wake >= nr_requeue) 1212 /*
1034 break; 1213 * Requeue nr_requeue waiters and possibly one more in the case
1214 * of requeue_pi if we couldn't acquire the lock atomically.
1215 */
1216 if (requeue_pi) {
1217 /* Prepare the waiter to take the rt_mutex. */
1218 atomic_inc(&pi_state->refcount);
1219 this->pi_state = pi_state;
1220 ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
1221 this->rt_waiter,
1222 this->task, 1);
1223 if (ret == 1) {
1224 /* We got the lock. */
1225 requeue_pi_wake_futex(this, &key2);
1226 continue;
1227 } else if (ret) {
1228 /* -EDEADLK */
1229 this->pi_state = NULL;
1230 free_pi_state(pi_state);
1231 goto out_unlock;
1232 }
1035 } 1233 }
1234 requeue_futex(this, hb1, hb2, &key2);
1235 drop_count++;
1036 } 1236 }
1037 1237
1038out_unlock: 1238out_unlock:
@@ -1047,7 +1247,9 @@ out_put_keys:
1047out_put_key1: 1247out_put_key1:
1048 put_futex_key(fshared, &key1); 1248 put_futex_key(fshared, &key1);
1049out: 1249out:
1050 return ret; 1250 if (pi_state != NULL)
1251 free_pi_state(pi_state);
1252 return ret ? ret : task_count;
1051} 1253}
1052 1254
1053/* The key must be already stored in q->key. */ 1255/* The key must be already stored in q->key. */
@@ -1270,6 +1472,7 @@ handle_fault:
1270#define FLAGS_HAS_TIMEOUT 0x04 1472#define FLAGS_HAS_TIMEOUT 0x04
1271 1473
1272static long futex_wait_restart(struct restart_block *restart); 1474static long futex_wait_restart(struct restart_block *restart);
1475static long futex_lock_pi_restart(struct restart_block *restart);
1273 1476
1274/** 1477/**
1275 * fixup_owner() - Post lock pi_state and corner case management 1478 * fixup_owner() - Post lock pi_state and corner case management
@@ -1489,6 +1692,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
1489 1692
1490 q.pi_state = NULL; 1693 q.pi_state = NULL;
1491 q.bitset = bitset; 1694 q.bitset = bitset;
1695 q.rt_waiter = NULL;
1492 1696
1493 if (abs_time) { 1697 if (abs_time) {
1494 to = &timeout; 1698 to = &timeout;
@@ -1596,6 +1800,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
1596 } 1800 }
1597 1801
1598 q.pi_state = NULL; 1802 q.pi_state = NULL;
1803 q.rt_waiter = NULL;
1599retry: 1804retry:
1600 q.key = FUTEX_KEY_INIT; 1805 q.key = FUTEX_KEY_INIT;
1601 ret = get_futex_key(uaddr, fshared, &q.key); 1806 ret = get_futex_key(uaddr, fshared, &q.key);
@@ -1701,6 +1906,20 @@ uaddr_faulted:
1701 goto retry; 1906 goto retry;
1702} 1907}
1703 1908
1909static long futex_lock_pi_restart(struct restart_block *restart)
1910{
1911 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
1912 ktime_t t, *tp = NULL;
1913 int fshared = restart->futex.flags & FLAGS_SHARED;
1914
1915 if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
1916 t.tv64 = restart->futex.time;
1917 tp = &t;
1918 }
1919 restart->fn = do_no_restart_syscall;
1920
1921 return (long)futex_lock_pi(uaddr, fshared, restart->futex.val, tp, 0);
1922}
1704 1923
1705/* 1924/*
1706 * Userspace attempted a TID -> 0 atomic transition, and failed. 1925 * Userspace attempted a TID -> 0 atomic transition, and failed.
@@ -1803,6 +2022,253 @@ pi_faulted:
1803 return ret; 2022 return ret;
1804} 2023}
1805 2024
2025/**
2026 * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex
2027 * @hb: the hash_bucket futex_q was original enqueued on
2028 * @q: the futex_q woken while waiting to be requeued
2029 * @key2: the futex_key of the requeue target futex
2030 * @timeout: the timeout associated with the wait (NULL if none)
2031 *
2032 * Detect if the task was woken on the initial futex as opposed to the requeue
2033 * target futex. If so, determine if it was a timeout or a signal that caused
2034 * the wakeup and return the appropriate error code to the caller. Must be
2035 * called with the hb lock held.
2036 *
2037 * Returns
2038 * 0 - no early wakeup detected
2039 * <0 - -ETIMEDOUT or -ERESTARTSYS (FIXME: or ERESTARTNOINTR?)
2040 */
2041static inline
2042int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2043 struct futex_q *q, union futex_key *key2,
2044 struct hrtimer_sleeper *timeout)
2045{
2046 int ret = 0;
2047
2048 /*
2049 * With the hb lock held, we avoid races while we process the wakeup.
2050 * We only need to hold hb (and not hb2) to ensure atomicity as the
2051 * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
2052 * It can't be requeued from uaddr2 to something else since we don't
2053 * support a PI aware source futex for requeue.
2054 */
2055 if (!match_futex(&q->key, key2)) {
2056 WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
2057 /*
2058 * We were woken prior to requeue by a timeout or a signal.
2059 * Unqueue the futex_q and determine which it was.
2060 */
2061 plist_del(&q->list, &q->list.plist);
2062 drop_futex_key_refs(&q->key);
2063
2064 if (timeout && !timeout->task)
2065 ret = -ETIMEDOUT;
2066 else {
2067 /*
2068 * We expect signal_pending(current), but another
2069 * thread may have handled it for us already.
2070 */
2071 /* FIXME: ERESTARTSYS or ERESTARTNOINTR? Do we care if
2072 * the user specified SA_RESTART or not? */
2073 ret = -ERESTARTSYS;
2074 }
2075 }
2076 return ret;
2077}
2078
2079/**
2080 * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
2081 * @uaddr: the futex we initialyl wait on (non-pi)
2082 * @fshared: whether the futexes are shared (1) or not (0). They must be
2083 * the same type, no requeueing from private to shared, etc.
2084 * @val: the expected value of uaddr
2085 * @abs_time: absolute timeout
2086 * @bitset: 32 bit wakeup bitset set by userspace, defaults to all.
2087 * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)
2088 * @uaddr2: the pi futex we will take prior to returning to user-space
2089 *
2090 * The caller will wait on uaddr and will be requeued by futex_requeue() to
2091 * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and
2092 * complete the acquisition of the rt_mutex prior to returning to userspace.
2093 * This ensures the rt_mutex maintains an owner when it has waiters; without
2094 * one, the pi logic wouldn't know which task to boost/deboost, if there was a
2095 * need to.
2096 *
2097 * We call schedule in futex_wait_queue_me() when we enqueue and return there
2098 * via the following:
2099 * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
2100 * 2) wakeup on uaddr2 after a requeue and subsequent unlock
2101 * 3) signal (before or after requeue)
2102 * 4) timeout (before or after requeue)
2103 *
2104 * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function.
2105 *
2106 * If 2, we may then block on trying to take the rt_mutex and return via:
2107 * 5) successful lock
2108 * 6) signal
2109 * 7) timeout
2110 * 8) other lock acquisition failure
2111 *
2112 * If 6, we setup a restart_block with futex_lock_pi() as the function.
2113 *
2114 * If 4 or 7, we cleanup and return with -ETIMEDOUT.
2115 *
2116 * Returns:
2117 * 0 - On success
2118 * <0 - On error
2119 */
2120static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
2121 u32 val, ktime_t *abs_time, u32 bitset,
2122 int clockrt, u32 __user *uaddr2)
2123{
2124 struct hrtimer_sleeper timeout, *to = NULL;
2125 struct rt_mutex_waiter rt_waiter;
2126 struct rt_mutex *pi_mutex = NULL;
2127 DECLARE_WAITQUEUE(wait, current);
2128 struct restart_block *restart;
2129 struct futex_hash_bucket *hb;
2130 union futex_key key2;
2131 struct futex_q q;
2132 int res, ret;
2133 u32 uval;
2134
2135 if (!bitset)
2136 return -EINVAL;
2137
2138 if (abs_time) {
2139 to = &timeout;
2140 hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
2141 CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
2142 hrtimer_init_sleeper(to, current);
2143 hrtimer_set_expires_range_ns(&to->timer, *abs_time,
2144 current->timer_slack_ns);
2145 }
2146
2147 /*
2148 * The waiter is allocated on our stack, manipulated by the requeue
2149 * code while we sleep on uaddr.
2150 */
2151 debug_rt_mutex_init_waiter(&rt_waiter);
2152 rt_waiter.task = NULL;
2153
2154 q.pi_state = NULL;
2155 q.bitset = bitset;
2156 q.rt_waiter = &rt_waiter;
2157
2158 key2 = FUTEX_KEY_INIT;
2159 ret = get_futex_key(uaddr2, fshared, &key2);
2160 if (unlikely(ret != 0))
2161 goto out;
2162
2163 /* Prepare to wait on uaddr. */
2164 ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
2165 if (ret) {
2166 put_futex_key(fshared, &key2);
2167 goto out;
2168 }
2169
2170 /* Queue the futex_q, drop the hb lock, wait for wakeup. */
2171 futex_wait_queue_me(hb, &q, to, &wait);
2172
2173 spin_lock(&hb->lock);
2174 ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
2175 spin_unlock(&hb->lock);
2176 if (ret)
2177 goto out_put_keys;
2178
2179 /*
2180 * In order for us to be here, we know our q.key == key2, and since
2181 * we took the hb->lock above, we also know that futex_requeue() has
2182 * completed and we no longer have to concern ourselves with a wakeup
2183 * race with the atomic proxy lock acquition by the requeue code.
2184 */
2185
2186 /* Check if the requeue code acquired the second futex for us. */
2187 if (!q.rt_waiter) {
2188 /*
2189 * Got the lock. We might not be the anticipated owner if we
2190 * did a lock-steal - fix up the PI-state in that case.
2191 */
2192 if (q.pi_state && (q.pi_state->owner != current)) {
2193 spin_lock(q.lock_ptr);
2194 ret = fixup_pi_state_owner(uaddr2, &q, current,
2195 fshared);
2196 spin_unlock(q.lock_ptr);
2197 }
2198 } else {
2199 /*
2200 * We have been woken up by futex_unlock_pi(), a timeout, or a
2201 * signal. futex_unlock_pi() will not destroy the lock_ptr nor
2202 * the pi_state.
2203 */
2204 WARN_ON(!&q.pi_state);
2205 pi_mutex = &q.pi_state->pi_mutex;
2206 ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
2207 debug_rt_mutex_free_waiter(&rt_waiter);
2208
2209 spin_lock(q.lock_ptr);
2210 /*
2211 * Fixup the pi_state owner and possibly acquire the lock if we
2212 * haven't already.
2213 */
2214 res = fixup_owner(uaddr2, fshared, &q, !ret);
2215 /*
2216 * If fixup_owner() returned an error, proprogate that. If it
2217 * acquired the lock, clear our -ETIMEDOUT or -EINTR.
2218 */
2219 if (res)
2220 ret = (res < 0) ? res : 0;
2221
2222 /* Unqueue and drop the lock. */
2223 unqueue_me_pi(&q);
2224 }
2225
2226 /*
2227 * If fixup_pi_state_owner() faulted and was unable to handle the
2228 * fault, unlock the rt_mutex and return the fault to userspace.
2229 */
2230 if (ret == -EFAULT) {
2231 if (rt_mutex_owner(pi_mutex) == current)
2232 rt_mutex_unlock(pi_mutex);
2233 } else if (ret == -EINTR) {
2234 ret = -EFAULT;
2235 if (get_user(uval, uaddr2))
2236 goto out_put_keys;
2237
2238 /*
2239 * We've already been requeued, so restart by calling
2240 * futex_lock_pi() directly, rather then returning to this
2241 * function.
2242 */
2243 ret = -ERESTART_RESTARTBLOCK;
2244 restart = &current_thread_info()->restart_block;
2245 restart->fn = futex_lock_pi_restart;
2246 restart->futex.uaddr = (u32 *)uaddr2;
2247 restart->futex.val = uval;
2248 restart->futex.flags = 0;
2249 if (abs_time) {
2250 restart->futex.flags |= FLAGS_HAS_TIMEOUT;
2251 restart->futex.time = abs_time->tv64;
2252 }
2253
2254 if (fshared)
2255 restart->futex.flags |= FLAGS_SHARED;
2256 if (clockrt)
2257 restart->futex.flags |= FLAGS_CLOCKRT;
2258 }
2259
2260out_put_keys:
2261 put_futex_key(fshared, &q.key);
2262 put_futex_key(fshared, &key2);
2263
2264out:
2265 if (to) {
2266 hrtimer_cancel(&to->timer);
2267 destroy_hrtimer_on_stack(&to->timer);
2268 }
2269 return ret;
2270}
2271
1806/* 2272/*
1807 * Support for robust futexes: the kernel cleans up held futexes at 2273 * Support for robust futexes: the kernel cleans up held futexes at
1808 * thread exit time. 2274 * thread exit time.
@@ -2025,7 +2491,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2025 fshared = 1; 2491 fshared = 1;
2026 2492
2027 clockrt = op & FUTEX_CLOCK_REALTIME; 2493 clockrt = op & FUTEX_CLOCK_REALTIME;
2028 if (clockrt && cmd != FUTEX_WAIT_BITSET) 2494 if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
2029 return -ENOSYS; 2495 return -ENOSYS;
2030 2496
2031 switch (cmd) { 2497 switch (cmd) {
@@ -2040,10 +2506,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2040 ret = futex_wake(uaddr, fshared, val, val3); 2506 ret = futex_wake(uaddr, fshared, val, val3);
2041 break; 2507 break;
2042 case FUTEX_REQUEUE: 2508 case FUTEX_REQUEUE:
2043 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL); 2509 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
2044 break; 2510 break;
2045 case FUTEX_CMP_REQUEUE: 2511 case FUTEX_CMP_REQUEUE:
2046 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3); 2512 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
2513 0);
2047 break; 2514 break;
2048 case FUTEX_WAKE_OP: 2515 case FUTEX_WAKE_OP:
2049 ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); 2516 ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
@@ -2060,6 +2527,18 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
2060 if (futex_cmpxchg_enabled) 2527 if (futex_cmpxchg_enabled)
2061 ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); 2528 ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
2062 break; 2529 break;
2530 case FUTEX_WAIT_REQUEUE_PI:
2531 val3 = FUTEX_BITSET_MATCH_ANY;
2532 ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3,
2533 clockrt, uaddr2);
2534 break;
2535 case FUTEX_REQUEUE_PI:
2536 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 1);
2537 break;
2538 case FUTEX_CMP_REQUEUE_PI:
2539 ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
2540 1);
2541 break;
2063 default: 2542 default:
2064 ret = -ENOSYS; 2543 ret = -ENOSYS;
2065 } 2544 }
@@ -2077,7 +2556,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2077 int cmd = op & FUTEX_CMD_MASK; 2556 int cmd = op & FUTEX_CMD_MASK;
2078 2557
2079 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || 2558 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
2080 cmd == FUTEX_WAIT_BITSET)) { 2559 cmd == FUTEX_WAIT_BITSET ||
2560 cmd == FUTEX_WAIT_REQUEUE_PI)) {
2081 if (copy_from_user(&ts, utime, sizeof(ts)) != 0) 2561 if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
2082 return -EFAULT; 2562 return -EFAULT;
2083 if (!timespec_valid(&ts)) 2563 if (!timespec_valid(&ts))
@@ -2089,10 +2569,11 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2089 tp = &t; 2569 tp = &t;
2090 } 2570 }
2091 /* 2571 /*
2092 * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE. 2572 * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
2093 * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP. 2573 * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
2094 */ 2574 */
2095 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || 2575 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
2576 cmd == FUTEX_REQUEUE_PI || cmd == FUTEX_CMP_REQUEUE_PI ||
2096 cmd == FUTEX_WAKE_OP) 2577 cmd == FUTEX_WAKE_OP)
2097 val2 = (u32) (unsigned long) utime; 2578 val2 = (u32) (unsigned long) utime;
2098 2579