aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDarren Hart <dvhltc@us.ibm.com>2009-03-12 03:55:37 -0400
committerIngo Molnar <mingo@elte.hu>2009-03-12 06:20:55 -0400
commitb2d0994b1301fc3a6a89e1889578dac9227840e3 (patch)
tree176a417a86d7072b014f1f872787eff7da2034de
parentebdcc81c71937b30e09110c02a1e8a21fa770b6f (diff)
futex: update futex commentary
Impact: cleanup The futex_hash_bucket can be a bit confusing when first looking at the code as it is a shared queue (and futex_q isn't a queue at all, but rather an element on the queue). The mmap_sem is no longer held outside of the futex_handle_fault() routine, yet numerous comments refer to it. The fshared argument is no an integer. I left some of these comments along as they are simply removed in future patches. Some of the commentary refering to futexes by virtual page mappings was not very clear, and completely accurate (as for shared futexes both the page and the offset are used to determine the key). For the purposes of the function description, just referring to "the futex" seems sufficient. With hashed futexes we now access the page after the hash-bucket is locked, and not only after it is enqueued. Signed-off-by: Darren Hart <dvhltc@us.ibm.com> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Rusty Russell <rusty@rustcorp.com.au> LKML-Reference: <20090312075537.9856.29954.stgit@Aeon> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/futex.c33
1 files changed, 14 insertions, 19 deletions
diff --git a/kernel/futex.c b/kernel/futex.c
index 438701adce23..e6a4d72bca3d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -114,7 +114,9 @@ struct futex_q {
114}; 114};
115 115
116/* 116/*
117 * Split the global futex_lock into every hash list lock. 117 * Hash buckets are shared by all the futex_keys that hash to the same
118 * location. Each key may have multiple futex_q structures, one for each task
119 * waiting on a futex.
118 */ 120 */
119struct futex_hash_bucket { 121struct futex_hash_bucket {
120 spinlock_t lock; 122 spinlock_t lock;
@@ -189,8 +191,7 @@ static void drop_futex_key_refs(union futex_key *key)
189/** 191/**
190 * get_futex_key - Get parameters which are the keys for a futex. 192 * get_futex_key - Get parameters which are the keys for a futex.
191 * @uaddr: virtual address of the futex 193 * @uaddr: virtual address of the futex
192 * @shared: NULL for a PROCESS_PRIVATE futex, 194 * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
193 * &current->mm->mmap_sem for a PROCESS_SHARED futex
194 * @key: address where result is stored. 195 * @key: address where result is stored.
195 * 196 *
196 * Returns a negative error code or 0 197 * Returns a negative error code or 0
@@ -200,9 +201,7 @@ static void drop_futex_key_refs(union futex_key *key)
200 * offset_within_page). For private mappings, it's (uaddr, current->mm). 201 * offset_within_page). For private mappings, it's (uaddr, current->mm).
201 * We can usually work out the index without swapping in the page. 202 * We can usually work out the index without swapping in the page.
202 * 203 *
203 * fshared is NULL for PROCESS_PRIVATE futexes 204 * lock_page() might sleep, the caller should not hold a spinlock.
204 * For other futexes, it points to &current->mm->mmap_sem and
205 * caller must have taken the reader lock. but NOT any spinlocks.
206 */ 205 */
207static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) 206static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
208{ 207{
@@ -589,10 +588,9 @@ static void wake_futex(struct futex_q *q)
589 * The waiting task can free the futex_q as soon as this is written, 588 * The waiting task can free the futex_q as soon as this is written,
590 * without taking any locks. This must come last. 589 * without taking any locks. This must come last.
591 * 590 *
592 * A memory barrier is required here to prevent the following store 591 * A memory barrier is required here to prevent the following store to
593 * to lock_ptr from getting ahead of the wakeup. Clearing the lock 592 * lock_ptr from getting ahead of the wakeup. Clearing the lock at the
594 * at the end of wake_up_all() does not prevent this store from 593 * end of wake_up() does not prevent this store from moving.
595 * moving.
596 */ 594 */
597 smp_wmb(); 595 smp_wmb();
598 q->lock_ptr = NULL; 596 q->lock_ptr = NULL;
@@ -693,8 +691,7 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
693} 691}
694 692
695/* 693/*
696 * Wake up all waiters hashed on the physical page that is mapped 694 * Wake up waiters matching bitset queued on this futex (uaddr).
697 * to this virtual address:
698 */ 695 */
699static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) 696static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
700{ 697{
@@ -1076,11 +1073,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1076 * in the user space variable. This must be atomic as we have 1073 * in the user space variable. This must be atomic as we have
1077 * to preserve the owner died bit here. 1074 * to preserve the owner died bit here.
1078 * 1075 *
1079 * Note: We write the user space value _before_ changing the 1076 * Note: We write the user space value _before_ changing the pi_state
1080 * pi_state because we can fault here. Imagine swapped out 1077 * because we can fault here. Imagine swapped out pages or a fork
1081 * pages or a fork, which was running right before we acquired 1078 * that marked all the anonymous memory readonly for cow.
1082 * mmap_sem, that marked all the anonymous memory readonly for
1083 * cow.
1084 * 1079 *
1085 * Modifying pi_state _before_ the user space value would 1080 * Modifying pi_state _before_ the user space value would
1086 * leave the pi_state in an inconsistent state when we fault 1081 * leave the pi_state in an inconsistent state when we fault
@@ -1188,7 +1183,7 @@ retry:
1188 hb = queue_lock(&q); 1183 hb = queue_lock(&q);
1189 1184
1190 /* 1185 /*
1191 * Access the page AFTER the futex is queued. 1186 * Access the page AFTER the hash-bucket is locked.
1192 * Order is important: 1187 * Order is important:
1193 * 1188 *
1194 * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val); 1189 * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
@@ -1204,7 +1199,7 @@ retry:
1204 * a wakeup when *uaddr != val on entry to the syscall. This is 1199 * a wakeup when *uaddr != val on entry to the syscall. This is
1205 * rare, but normal. 1200 * rare, but normal.
1206 * 1201 *
1207 * for shared futexes, we hold the mmap semaphore, so the mapping 1202 * For shared futexes, we hold the mmap semaphore, so the mapping
1208 * cannot have changed since we looked it up in get_futex_key. 1203 * cannot have changed since we looked it up in get_futex_key.
1209 */ 1204 */
1210 ret = get_futex_value_locked(&uval, uaddr); 1205 ret = get_futex_value_locked(&uval, uaddr);