aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/futex.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/futex.c')
-rw-r--r--kernel/futex.c127
1 files changed, 95 insertions, 32 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 44a1261cb9ff..5f589279e462 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -70,7 +70,10 @@
70#include "locking/rtmutex_common.h" 70#include "locking/rtmutex_common.h"
71 71
72/* 72/*
73 * Basic futex operation and ordering guarantees: 73 * READ this before attempting to hack on futexes!
74 *
75 * Basic futex operation and ordering guarantees
76 * =============================================
74 * 77 *
75 * The waiter reads the futex value in user space and calls 78 * The waiter reads the futex value in user space and calls
76 * futex_wait(). This function computes the hash bucket and acquires 79 * futex_wait(). This function computes the hash bucket and acquires
@@ -119,7 +122,7 @@
119 * sys_futex(WAIT, futex, val); 122 * sys_futex(WAIT, futex, val);
120 * futex_wait(futex, val); 123 * futex_wait(futex, val);
121 * 124 *
122 * waiters++; 125 * waiters++; (a)
123 * mb(); (A) <-- paired with -. 126 * mb(); (A) <-- paired with -.
124 * | 127 * |
125 * lock(hash_bucket(futex)); | 128 * lock(hash_bucket(futex)); |
@@ -135,14 +138,14 @@
135 * unlock(hash_bucket(futex)); 138 * unlock(hash_bucket(futex));
136 * schedule(); if (waiters) 139 * schedule(); if (waiters)
137 * lock(hash_bucket(futex)); 140 * lock(hash_bucket(futex));
138 * wake_waiters(futex); 141 * else wake_waiters(futex);
139 * unlock(hash_bucket(futex)); 142 * waiters--; (b) unlock(hash_bucket(futex));
140 * 143 *
141 * Where (A) orders the waiters increment and the futex value read -- this 144 * Where (A) orders the waiters increment and the futex value read through
142 * is guaranteed by the head counter in the hb spinlock; and where (B) 145 * atomic operations (see hb_waiters_inc) and where (B) orders the write
143 * orders the write to futex and the waiters read -- this is done by the 146 * to futex and the waiters read -- this is done by the barriers in
144 * barriers in get_futex_key_refs(), through either ihold or atomic_inc, 147 * get_futex_key_refs(), through either ihold or atomic_inc, depending on the
145 * depending on the futex type. 148 * futex type.
146 * 149 *
147 * This yields the following case (where X:=waiters, Y:=futex): 150 * This yields the following case (where X:=waiters, Y:=futex):
148 * 151 *
@@ -155,9 +158,22 @@
155 * Which guarantees that x==0 && y==0 is impossible; which translates back into 158 * Which guarantees that x==0 && y==0 is impossible; which translates back into
156 * the guarantee that we cannot both miss the futex variable change and the 159 * the guarantee that we cannot both miss the futex variable change and the
157 * enqueue. 160 * enqueue.
161 *
162 * Note that a new waiter is accounted for in (a) even when it is possible that
163 * the wait call can return error, in which case we backtrack from it in (b).
164 * Refer to the comment in queue_lock().
165 *
166 * Similarly, in order to account for waiters being requeued on another
167 * address we always increment the waiters for the destination bucket before
168 * acquiring the lock. It then decrements them again after releasing it -
169 * the code that actually moves the futex(es) between hash buckets (requeue_futex)
170 * will do the additional required waiter count housekeeping. This is done for
171 * double_lock_hb() and double_unlock_hb(), respectively.
158 */ 172 */
159 173
174#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
160int __read_mostly futex_cmpxchg_enabled; 175int __read_mostly futex_cmpxchg_enabled;
176#endif
161 177
162/* 178/*
163 * Futex flags used to encode options to functions and preserve them across 179 * Futex flags used to encode options to functions and preserve them across
@@ -234,6 +250,7 @@ static const struct futex_q futex_q_init = {
234 * waiting on a futex. 250 * waiting on a futex.
235 */ 251 */
236struct futex_hash_bucket { 252struct futex_hash_bucket {
253 atomic_t waiters;
237 spinlock_t lock; 254 spinlock_t lock;
238 struct plist_head chain; 255 struct plist_head chain;
239} ____cacheline_aligned_in_smp; 256} ____cacheline_aligned_in_smp;
@@ -253,22 +270,37 @@ static inline void futex_get_mm(union futex_key *key)
253 smp_mb__after_atomic_inc(); 270 smp_mb__after_atomic_inc();
254} 271}
255 272
256static inline bool hb_waiters_pending(struct futex_hash_bucket *hb) 273/*
274 * Reflects a new waiter being added to the waitqueue.
275 */
276static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
257{ 277{
258#ifdef CONFIG_SMP 278#ifdef CONFIG_SMP
279 atomic_inc(&hb->waiters);
259 /* 280 /*
260 * Tasks trying to enter the critical region are most likely 281 * Full barrier (A), see the ordering comment above.
261 * potential waiters that will be added to the plist. Ensure
262 * that wakers won't miss to-be-slept tasks in the window between
263 * the wait call and the actual plist_add.
264 */ 282 */
265 if (spin_is_locked(&hb->lock)) 283 smp_mb__after_atomic_inc();
266 return true; 284#endif
267 smp_rmb(); /* Make sure we check the lock state first */ 285}
268 286
269 return !plist_head_empty(&hb->chain); 287/*
288 * Reflects a waiter being removed from the waitqueue by wakeup
289 * paths.
290 */
291static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
292{
293#ifdef CONFIG_SMP
294 atomic_dec(&hb->waiters);
295#endif
296}
297
298static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
299{
300#ifdef CONFIG_SMP
301 return atomic_read(&hb->waiters);
270#else 302#else
271 return true; 303 return 1;
272#endif 304#endif
273} 305}
274 306
@@ -954,6 +986,7 @@ static void __unqueue_futex(struct futex_q *q)
954 986
955 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); 987 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
956 plist_del(&q->list, &hb->chain); 988 plist_del(&q->list, &hb->chain);
989 hb_waiters_dec(hb);
957} 990}
958 991
959/* 992/*
@@ -1257,7 +1290,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1257 */ 1290 */
1258 if (likely(&hb1->chain != &hb2->chain)) { 1291 if (likely(&hb1->chain != &hb2->chain)) {
1259 plist_del(&q->list, &hb1->chain); 1292 plist_del(&q->list, &hb1->chain);
1293 hb_waiters_dec(hb1);
1260 plist_add(&q->list, &hb2->chain); 1294 plist_add(&q->list, &hb2->chain);
1295 hb_waiters_inc(hb2);
1261 q->lock_ptr = &hb2->lock; 1296 q->lock_ptr = &hb2->lock;
1262 } 1297 }
1263 get_futex_key_refs(key2); 1298 get_futex_key_refs(key2);
@@ -1431,6 +1466,7 @@ retry:
1431 hb2 = hash_futex(&key2); 1466 hb2 = hash_futex(&key2);
1432 1467
1433retry_private: 1468retry_private:
1469 hb_waiters_inc(hb2);
1434 double_lock_hb(hb1, hb2); 1470 double_lock_hb(hb1, hb2);
1435 1471
1436 if (likely(cmpval != NULL)) { 1472 if (likely(cmpval != NULL)) {
@@ -1440,6 +1476,7 @@ retry_private:
1440 1476
1441 if (unlikely(ret)) { 1477 if (unlikely(ret)) {
1442 double_unlock_hb(hb1, hb2); 1478 double_unlock_hb(hb1, hb2);
1479 hb_waiters_dec(hb2);
1443 1480
1444 ret = get_user(curval, uaddr1); 1481 ret = get_user(curval, uaddr1);
1445 if (ret) 1482 if (ret)
@@ -1489,6 +1526,7 @@ retry_private:
1489 break; 1526 break;
1490 case -EFAULT: 1527 case -EFAULT:
1491 double_unlock_hb(hb1, hb2); 1528 double_unlock_hb(hb1, hb2);
1529 hb_waiters_dec(hb2);
1492 put_futex_key(&key2); 1530 put_futex_key(&key2);
1493 put_futex_key(&key1); 1531 put_futex_key(&key1);
1494 ret = fault_in_user_writeable(uaddr2); 1532 ret = fault_in_user_writeable(uaddr2);
@@ -1498,6 +1536,7 @@ retry_private:
1498 case -EAGAIN: 1536 case -EAGAIN:
1499 /* The owner was exiting, try again. */ 1537 /* The owner was exiting, try again. */
1500 double_unlock_hb(hb1, hb2); 1538 double_unlock_hb(hb1, hb2);
1539 hb_waiters_dec(hb2);
1501 put_futex_key(&key2); 1540 put_futex_key(&key2);
1502 put_futex_key(&key1); 1541 put_futex_key(&key1);
1503 cond_resched(); 1542 cond_resched();
@@ -1573,6 +1612,7 @@ retry_private:
1573 1612
1574out_unlock: 1613out_unlock:
1575 double_unlock_hb(hb1, hb2); 1614 double_unlock_hb(hb1, hb2);
1615 hb_waiters_dec(hb2);
1576 1616
1577 /* 1617 /*
1578 * drop_futex_key_refs() must be called outside the spinlocks. During 1618 * drop_futex_key_refs() must be called outside the spinlocks. During
@@ -1600,6 +1640,17 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
1600 struct futex_hash_bucket *hb; 1640 struct futex_hash_bucket *hb;
1601 1641
1602 hb = hash_futex(&q->key); 1642 hb = hash_futex(&q->key);
1643
1644 /*
1645 * Increment the counter before taking the lock so that
1646 * a potential waker won't miss a to-be-slept task that is
1647 * waiting for the spinlock. This is safe as all queue_lock()
1648 * users end up calling queue_me(). Similarly, for housekeeping,
1649 * decrement the counter at queue_unlock() when some error has
1650 * occurred and we don't end up adding the task to the list.
1651 */
1652 hb_waiters_inc(hb);
1653
1603 q->lock_ptr = &hb->lock; 1654 q->lock_ptr = &hb->lock;
1604 1655
1605 spin_lock(&hb->lock); /* implies MB (A) */ 1656 spin_lock(&hb->lock); /* implies MB (A) */
@@ -1611,6 +1662,7 @@ queue_unlock(struct futex_hash_bucket *hb)
1611 __releases(&hb->lock) 1662 __releases(&hb->lock)
1612{ 1663{
1613 spin_unlock(&hb->lock); 1664 spin_unlock(&hb->lock);
1665 hb_waiters_dec(hb);
1614} 1666}
1615 1667
1616/** 1668/**
@@ -2342,6 +2394,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2342 * Unqueue the futex_q and determine which it was. 2394 * Unqueue the futex_q and determine which it was.
2343 */ 2395 */
2344 plist_del(&q->list, &hb->chain); 2396 plist_del(&q->list, &hb->chain);
2397 hb_waiters_dec(hb);
2345 2398
2346 /* Handle spurious wakeups gracefully */ 2399 /* Handle spurious wakeups gracefully */
2347 ret = -EWOULDBLOCK; 2400 ret = -EWOULDBLOCK;
@@ -2843,9 +2896,28 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
2843 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); 2896 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
2844} 2897}
2845 2898
2846static int __init futex_init(void) 2899static void __init futex_detect_cmpxchg(void)
2847{ 2900{
2901#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
2848 u32 curval; 2902 u32 curval;
2903
2904 /*
2905 * This will fail and we want it. Some arch implementations do
2906 * runtime detection of the futex_atomic_cmpxchg_inatomic()
2907 * functionality. We want to know that before we call in any
2908 * of the complex code paths. Also we want to prevent
2909 * registration of robust lists in that case. NULL is
2910 * guaranteed to fault and we get -EFAULT on functional
2911 * implementation, the non-functional ones will return
2912 * -ENOSYS.
2913 */
2914 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2915 futex_cmpxchg_enabled = 1;
2916#endif
2917}
2918
2919static int __init futex_init(void)
2920{
2849 unsigned int futex_shift; 2921 unsigned int futex_shift;
2850 unsigned long i; 2922 unsigned long i;
2851 2923
@@ -2861,20 +2933,11 @@ static int __init futex_init(void)
2861 &futex_shift, NULL, 2933 &futex_shift, NULL,
2862 futex_hashsize, futex_hashsize); 2934 futex_hashsize, futex_hashsize);
2863 futex_hashsize = 1UL << futex_shift; 2935 futex_hashsize = 1UL << futex_shift;
2864 /* 2936
2865 * This will fail and we want it. Some arch implementations do 2937 futex_detect_cmpxchg();
2866 * runtime detection of the futex_atomic_cmpxchg_inatomic()
2867 * functionality. We want to know that before we call in any
2868 * of the complex code paths. Also we want to prevent
2869 * registration of robust lists in that case. NULL is
2870 * guaranteed to fault and we get -EFAULT on functional
2871 * implementation, the non-functional ones will return
2872 * -ENOSYS.
2873 */
2874 if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
2875 futex_cmpxchg_enabled = 1;
2876 2938
2877 for (i = 0; i < futex_hashsize; i++) { 2939 for (i = 0; i < futex_hashsize; i++) {
2940 atomic_set(&futex_queues[i].waiters, 0);
2878 plist_head_init(&futex_queues[i].chain); 2941 plist_head_init(&futex_queues[i].chain);
2879 spin_lock_init(&futex_queues[i].lock); 2942 spin_lock_init(&futex_queues[i].lock);
2880 } 2943 }