diff options
Diffstat (limited to 'kernel/futex.c')
-rw-r--r-- | kernel/futex.c | 76 |
1 files changed, 53 insertions, 23 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 794c862125f..248dd119a86 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -115,6 +115,9 @@ struct futex_q { | |||
115 | /* rt_waiter storage for requeue_pi: */ | 115 | /* rt_waiter storage for requeue_pi: */ |
116 | struct rt_mutex_waiter *rt_waiter; | 116 | struct rt_mutex_waiter *rt_waiter; |
117 | 117 | ||
118 | /* The expected requeue pi target futex key: */ | ||
119 | union futex_key *requeue_pi_key; | ||
120 | |||
118 | /* Bitset for the optional bitmasked wakeup */ | 121 | /* Bitset for the optional bitmasked wakeup */ |
119 | u32 bitset; | 122 | u32 bitset; |
120 | }; | 123 | }; |
@@ -247,6 +250,7 @@ again: | |||
247 | if (err < 0) | 250 | if (err < 0) |
248 | return err; | 251 | return err; |
249 | 252 | ||
253 | page = compound_head(page); | ||
250 | lock_page(page); | 254 | lock_page(page); |
251 | if (!page->mapping) { | 255 | if (!page->mapping) { |
252 | unlock_page(page); | 256 | unlock_page(page); |
@@ -1009,15 +1013,19 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, | |||
1009 | * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue | 1013 | * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue |
1010 | * q: the futex_q | 1014 | * q: the futex_q |
1011 | * key: the key of the requeue target futex | 1015 | * key: the key of the requeue target futex |
1016 | * hb: the hash_bucket of the requeue target futex | ||
1012 | * | 1017 | * |
1013 | * During futex_requeue, with requeue_pi=1, it is possible to acquire the | 1018 | * During futex_requeue, with requeue_pi=1, it is possible to acquire the |
1014 | * target futex if it is uncontended or via a lock steal. Set the futex_q key | 1019 | * target futex if it is uncontended or via a lock steal. Set the futex_q key |
1015 | * to the requeue target futex so the waiter can detect the wakeup on the right | 1020 | * to the requeue target futex so the waiter can detect the wakeup on the right |
1016 | * futex, but remove it from the hb and NULL the rt_waiter so it can detect | 1021 | * futex, but remove it from the hb and NULL the rt_waiter so it can detect |
1017 | * atomic lock acquisition. Must be called with the q->lock_ptr held. | 1022 | * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock |
1023 | * to protect access to the pi_state to fixup the owner later. Must be called | ||
1024 | * with both q->lock_ptr and hb->lock held. | ||
1018 | */ | 1025 | */ |
1019 | static inline | 1026 | static inline |
1020 | void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) | 1027 | void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, |
1028 | struct futex_hash_bucket *hb) | ||
1021 | { | 1029 | { |
1022 | drop_futex_key_refs(&q->key); | 1030 | drop_futex_key_refs(&q->key); |
1023 | get_futex_key_refs(key); | 1031 | get_futex_key_refs(key); |
@@ -1029,6 +1037,11 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) | |||
1029 | WARN_ON(!q->rt_waiter); | 1037 | WARN_ON(!q->rt_waiter); |
1030 | q->rt_waiter = NULL; | 1038 | q->rt_waiter = NULL; |
1031 | 1039 | ||
1040 | q->lock_ptr = &hb->lock; | ||
1041 | #ifdef CONFIG_DEBUG_PI_LIST | ||
1042 | q->list.plist.lock = &hb->lock; | ||
1043 | #endif | ||
1044 | |||
1032 | wake_up_state(q->task, TASK_NORMAL); | 1045 | wake_up_state(q->task, TASK_NORMAL); |
1033 | } | 1046 | } |
1034 | 1047 | ||
@@ -1079,6 +1092,10 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | |||
1079 | if (!top_waiter) | 1092 | if (!top_waiter) |
1080 | return 0; | 1093 | return 0; |
1081 | 1094 | ||
1095 | /* Ensure we requeue to the expected futex. */ | ||
1096 | if (!match_futex(top_waiter->requeue_pi_key, key2)) | ||
1097 | return -EINVAL; | ||
1098 | |||
1082 | /* | 1099 | /* |
1083 | * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in | 1100 | * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in |
1084 | * the contended case or if set_waiters is 1. The pi_state is returned | 1101 | * the contended case or if set_waiters is 1. The pi_state is returned |
@@ -1087,7 +1104,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, | |||
1087 | ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, | 1104 | ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, |
1088 | set_waiters); | 1105 | set_waiters); |
1089 | if (ret == 1) | 1106 | if (ret == 1) |
1090 | requeue_pi_wake_futex(top_waiter, key2); | 1107 | requeue_pi_wake_futex(top_waiter, key2, hb2); |
1091 | 1108 | ||
1092 | return ret; | 1109 | return ret; |
1093 | } | 1110 | } |
@@ -1246,8 +1263,15 @@ retry_private: | |||
1246 | if (!match_futex(&this->key, &key1)) | 1263 | if (!match_futex(&this->key, &key1)) |
1247 | continue; | 1264 | continue; |
1248 | 1265 | ||
1249 | WARN_ON(!requeue_pi && this->rt_waiter); | 1266 | /* |
1250 | WARN_ON(requeue_pi && !this->rt_waiter); | 1267 | * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always |
1268 | * be paired with each other and no other futex ops. | ||
1269 | */ | ||
1270 | if ((requeue_pi && !this->rt_waiter) || | ||
1271 | (!requeue_pi && this->rt_waiter)) { | ||
1272 | ret = -EINVAL; | ||
1273 | break; | ||
1274 | } | ||
1251 | 1275 | ||
1252 | /* | 1276 | /* |
1253 | * Wake nr_wake waiters. For requeue_pi, if we acquired the | 1277 | * Wake nr_wake waiters. For requeue_pi, if we acquired the |
@@ -1259,6 +1283,12 @@ retry_private: | |||
1259 | continue; | 1283 | continue; |
1260 | } | 1284 | } |
1261 | 1285 | ||
1286 | /* Ensure we requeue to the expected futex for requeue_pi. */ | ||
1287 | if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) { | ||
1288 | ret = -EINVAL; | ||
1289 | break; | ||
1290 | } | ||
1291 | |||
1262 | /* | 1292 | /* |
1263 | * Requeue nr_requeue waiters and possibly one more in the case | 1293 | * Requeue nr_requeue waiters and possibly one more in the case |
1264 | * of requeue_pi if we couldn't acquire the lock atomically. | 1294 | * of requeue_pi if we couldn't acquire the lock atomically. |
@@ -1272,7 +1302,7 @@ retry_private: | |||
1272 | this->task, 1); | 1302 | this->task, 1); |
1273 | if (ret == 1) { | 1303 | if (ret == 1) { |
1274 | /* We got the lock. */ | 1304 | /* We got the lock. */ |
1275 | requeue_pi_wake_futex(this, &key2); | 1305 | requeue_pi_wake_futex(this, &key2, hb2); |
1276 | continue; | 1306 | continue; |
1277 | } else if (ret) { | 1307 | } else if (ret) { |
1278 | /* -EDEADLK */ | 1308 | /* -EDEADLK */ |
@@ -1734,6 +1764,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
1734 | q.pi_state = NULL; | 1764 | q.pi_state = NULL; |
1735 | q.bitset = bitset; | 1765 | q.bitset = bitset; |
1736 | q.rt_waiter = NULL; | 1766 | q.rt_waiter = NULL; |
1767 | q.requeue_pi_key = NULL; | ||
1737 | 1768 | ||
1738 | if (abs_time) { | 1769 | if (abs_time) { |
1739 | to = &timeout; | 1770 | to = &timeout; |
@@ -1841,6 +1872,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, | |||
1841 | 1872 | ||
1842 | q.pi_state = NULL; | 1873 | q.pi_state = NULL; |
1843 | q.rt_waiter = NULL; | 1874 | q.rt_waiter = NULL; |
1875 | q.requeue_pi_key = NULL; | ||
1844 | retry: | 1876 | retry: |
1845 | q.key = FUTEX_KEY_INIT; | 1877 | q.key = FUTEX_KEY_INIT; |
1846 | ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE); | 1878 | ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE); |
@@ -2101,11 +2133,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2101 | * We call schedule in futex_wait_queue_me() when we enqueue and return there | 2133 | * We call schedule in futex_wait_queue_me() when we enqueue and return there |
2102 | * via the following: | 2134 | * via the following: |
2103 | * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() | 2135 | * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() |
2104 | * 2) wakeup on uaddr2 after a requeue and subsequent unlock | 2136 | * 2) wakeup on uaddr2 after a requeue |
2105 | * 3) signal (before or after requeue) | 2137 | * 3) signal |
2106 | * 4) timeout (before or after requeue) | 2138 | * 4) timeout |
2107 | * | 2139 | * |
2108 | * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function. | 2140 | * If 3, cleanup and return -ERESTARTNOINTR. |
2109 | * | 2141 | * |
2110 | * If 2, we may then block on trying to take the rt_mutex and return via: | 2142 | * If 2, we may then block on trying to take the rt_mutex and return via: |
2111 | * 5) successful lock | 2143 | * 5) successful lock |
@@ -2113,7 +2145,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, | |||
2113 | * 7) timeout | 2145 | * 7) timeout |
2114 | * 8) other lock acquisition failure | 2146 | * 8) other lock acquisition failure |
2115 | * | 2147 | * |
2116 | * If 6, we setup a restart_block with futex_lock_pi() as the function. | 2148 | * If 6, return -EWOULDBLOCK (restarting the syscall would do the same). |
2117 | * | 2149 | * |
2118 | * If 4 or 7, we cleanup and return with -ETIMEDOUT. | 2150 | * If 4 or 7, we cleanup and return with -ETIMEDOUT. |
2119 | * | 2151 | * |
@@ -2152,15 +2184,16 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2152 | debug_rt_mutex_init_waiter(&rt_waiter); | 2184 | debug_rt_mutex_init_waiter(&rt_waiter); |
2153 | rt_waiter.task = NULL; | 2185 | rt_waiter.task = NULL; |
2154 | 2186 | ||
2155 | q.pi_state = NULL; | ||
2156 | q.bitset = bitset; | ||
2157 | q.rt_waiter = &rt_waiter; | ||
2158 | |||
2159 | key2 = FUTEX_KEY_INIT; | 2187 | key2 = FUTEX_KEY_INIT; |
2160 | ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); | 2188 | ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); |
2161 | if (unlikely(ret != 0)) | 2189 | if (unlikely(ret != 0)) |
2162 | goto out; | 2190 | goto out; |
2163 | 2191 | ||
2192 | q.pi_state = NULL; | ||
2193 | q.bitset = bitset; | ||
2194 | q.rt_waiter = &rt_waiter; | ||
2195 | q.requeue_pi_key = &key2; | ||
2196 | |||
2164 | /* Prepare to wait on uaddr. */ | 2197 | /* Prepare to wait on uaddr. */ |
2165 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); | 2198 | ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); |
2166 | if (ret) | 2199 | if (ret) |
@@ -2231,14 +2264,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2231 | rt_mutex_unlock(pi_mutex); | 2264 | rt_mutex_unlock(pi_mutex); |
2232 | } else if (ret == -EINTR) { | 2265 | } else if (ret == -EINTR) { |
2233 | /* | 2266 | /* |
2234 | * We've already been requeued, but we have no way to | 2267 | * We've already been requeued, but cannot restart by calling |
2235 | * restart by calling futex_lock_pi() directly. We | 2268 | * futex_lock_pi() directly. We could restart this syscall, but |
2236 | * could restart the syscall, but that will look at | 2269 | * it would detect that the user space "val" changed and return |
2237 | * the user space value and return right away. So we | 2270 | * -EWOULDBLOCK. Save the overhead of the restart and return |
2238 | * drop back with EWOULDBLOCK to tell user space that | 2271 | * -EWOULDBLOCK directly. |
2239 | * "val" has been changed. That's the same what the | ||
2240 | * restart of the syscall would do in | ||
2241 | * futex_wait_setup(). | ||
2242 | */ | 2272 | */ |
2243 | ret = -EWOULDBLOCK; | 2273 | ret = -EWOULDBLOCK; |
2244 | } | 2274 | } |