diff options
Diffstat (limited to 'kernel/futex.c')
-rw-r--r-- | kernel/futex.c | 37 |
1 files changed, 28 insertions, 9 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 67dacaf93e56..5f589279e462 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -70,7 +70,10 @@ | |||
70 | #include "locking/rtmutex_common.h" | 70 | #include "locking/rtmutex_common.h" |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * Basic futex operation and ordering guarantees: | 73 | * READ this before attempting to hack on futexes! |
74 | * | ||
75 | * Basic futex operation and ordering guarantees | ||
76 | * ============================================= | ||
74 | * | 77 | * |
75 | * The waiter reads the futex value in user space and calls | 78 | * The waiter reads the futex value in user space and calls |
76 | * futex_wait(). This function computes the hash bucket and acquires | 79 | * futex_wait(). This function computes the hash bucket and acquires |
@@ -119,7 +122,7 @@ | |||
119 | * sys_futex(WAIT, futex, val); | 122 | * sys_futex(WAIT, futex, val); |
120 | * futex_wait(futex, val); | 123 | * futex_wait(futex, val); |
121 | * | 124 | * |
122 | * waiters++; | 125 | * waiters++; (a) |
123 | * mb(); (A) <-- paired with -. | 126 | * mb(); (A) <-- paired with -. |
124 | * | | 127 | * | |
125 | * lock(hash_bucket(futex)); | | 128 | * lock(hash_bucket(futex)); | |
@@ -135,14 +138,14 @@ | |||
135 | * unlock(hash_bucket(futex)); | 138 | * unlock(hash_bucket(futex)); |
136 | * schedule(); if (waiters) | 139 | * schedule(); if (waiters) |
137 | * lock(hash_bucket(futex)); | 140 | * lock(hash_bucket(futex)); |
138 | * wake_waiters(futex); | 141 | * else wake_waiters(futex); |
139 | * unlock(hash_bucket(futex)); | 142 | * waiters--; (b) unlock(hash_bucket(futex)); |
140 | * | 143 | * |
141 | * Where (A) orders the waiters increment and the futex value read -- this | 144 | * Where (A) orders the waiters increment and the futex value read through |
142 | * is guaranteed by the head counter in the hb spinlock; and where (B) | 145 | * atomic operations (see hb_waiters_inc) and where (B) orders the write |
143 | * orders the write to futex and the waiters read -- this is done by the | 146 | * to futex and the waiters read -- this is done by the barriers in |
144 | * barriers in get_futex_key_refs(), through either ihold or atomic_inc, | 147 | * get_futex_key_refs(), through either ihold or atomic_inc, depending on the |
145 | * depending on the futex type. | 148 | * futex type. |
146 | * | 149 | * |
147 | * This yields the following case (where X:=waiters, Y:=futex): | 150 | * This yields the following case (where X:=waiters, Y:=futex): |
148 | * | 151 | * |
@@ -155,6 +158,17 @@ | |||
155 | * Which guarantees that x==0 && y==0 is impossible; which translates back into | 158 | * Which guarantees that x==0 && y==0 is impossible; which translates back into |
156 | * the guarantee that we cannot both miss the futex variable change and the | 159 | * the guarantee that we cannot both miss the futex variable change and the |
157 | * enqueue. | 160 | * enqueue. |
161 | * | ||
162 | * Note that a new waiter is accounted for in (a) even when it is possible that | ||
163 | * the wait call can return error, in which case we backtrack from it in (b). | ||
164 | * Refer to the comment in queue_lock(). | ||
165 | * | ||
166 | * Similarly, in order to account for waiters being requeued on another | ||
167 | * address we always increment the waiters for the destination bucket before | ||
168 | * acquiring the lock. It then decrements them again after releasing it - | ||
169 | * the code that actually moves the futex(es) between hash buckets (requeue_futex) | ||
170 | * will do the additional required waiter count housekeeping. This is done for | ||
171 | * double_lock_hb() and double_unlock_hb(), respectively. | ||
158 | */ | 172 | */ |
159 | 173 | ||
160 | #ifndef CONFIG_HAVE_FUTEX_CMPXCHG | 174 | #ifndef CONFIG_HAVE_FUTEX_CMPXCHG |
@@ -1452,6 +1466,7 @@ retry: | |||
1452 | hb2 = hash_futex(&key2); | 1466 | hb2 = hash_futex(&key2); |
1453 | 1467 | ||
1454 | retry_private: | 1468 | retry_private: |
1469 | hb_waiters_inc(hb2); | ||
1455 | double_lock_hb(hb1, hb2); | 1470 | double_lock_hb(hb1, hb2); |
1456 | 1471 | ||
1457 | if (likely(cmpval != NULL)) { | 1472 | if (likely(cmpval != NULL)) { |
@@ -1461,6 +1476,7 @@ retry_private: | |||
1461 | 1476 | ||
1462 | if (unlikely(ret)) { | 1477 | if (unlikely(ret)) { |
1463 | double_unlock_hb(hb1, hb2); | 1478 | double_unlock_hb(hb1, hb2); |
1479 | hb_waiters_dec(hb2); | ||
1464 | 1480 | ||
1465 | ret = get_user(curval, uaddr1); | 1481 | ret = get_user(curval, uaddr1); |
1466 | if (ret) | 1482 | if (ret) |
@@ -1510,6 +1526,7 @@ retry_private: | |||
1510 | break; | 1526 | break; |
1511 | case -EFAULT: | 1527 | case -EFAULT: |
1512 | double_unlock_hb(hb1, hb2); | 1528 | double_unlock_hb(hb1, hb2); |
1529 | hb_waiters_dec(hb2); | ||
1513 | put_futex_key(&key2); | 1530 | put_futex_key(&key2); |
1514 | put_futex_key(&key1); | 1531 | put_futex_key(&key1); |
1515 | ret = fault_in_user_writeable(uaddr2); | 1532 | ret = fault_in_user_writeable(uaddr2); |
@@ -1519,6 +1536,7 @@ retry_private: | |||
1519 | case -EAGAIN: | 1536 | case -EAGAIN: |
1520 | /* The owner was exiting, try again. */ | 1537 | /* The owner was exiting, try again. */ |
1521 | double_unlock_hb(hb1, hb2); | 1538 | double_unlock_hb(hb1, hb2); |
1539 | hb_waiters_dec(hb2); | ||
1522 | put_futex_key(&key2); | 1540 | put_futex_key(&key2); |
1523 | put_futex_key(&key1); | 1541 | put_futex_key(&key1); |
1524 | cond_resched(); | 1542 | cond_resched(); |
@@ -1594,6 +1612,7 @@ retry_private: | |||
1594 | 1612 | ||
1595 | out_unlock: | 1613 | out_unlock: |
1596 | double_unlock_hb(hb1, hb2); | 1614 | double_unlock_hb(hb1, hb2); |
1615 | hb_waiters_dec(hb2); | ||
1597 | 1616 | ||
1598 | /* | 1617 | /* |
1599 | * drop_futex_key_refs() must be called outside the spinlocks. During | 1618 | * drop_futex_key_refs() must be called outside the spinlocks. During |