diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2009-05-05 13:21:40 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2009-05-15 09:24:18 -0400 |
commit | f1a11e0576c7a73d759d05d776692b2b2d37172b (patch) | |
tree | 3cb845c4cfd48cdbe0755d057c2698657fb965b5 | |
parent | b30505c81a9d4adea8b70ecff512b0216929b797 (diff) |
futex: remove the wait queue
The waitqueue which is used in struct futex_q is a leftover from the
futexfd implementation. There is no need to use a waitqueue at all, as
the waiting task is the only user of it. The waitqueue just adds
additional locking and a loop in the wake up path which both can be
avoided.
We have already a task reference in struct futex_q which is used for
PI futexes. Use it for normal futexes as well and just wake up the
task directly.
The logic of signalling the futex wakeup via setting q->lock_ptr to
NULL is kept with the difference that we set it NULL before doing the
wakeup. This opens an exit race window vs. a non futex wake up of the
to be woken up task, which we prevent with get_task_struct /
put_task_struct on the waiter.
[ Impact: simplification ]
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | kernel/futex.c | 58 |
1 files changed, 25 insertions, 33 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index aec8bf89bf4e..157bfcd725b8 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -100,8 +100,8 @@ struct futex_pi_state { | |||
100 | */ | 100 | */ |
101 | struct futex_q { | 101 | struct futex_q { |
102 | struct plist_node list; | 102 | struct plist_node list; |
103 | /* There can only be a single waiter */ | 103 | /* Waiter reference */ |
104 | wait_queue_head_t waiter; | 104 | struct task_struct *task; |
105 | 105 | ||
106 | /* Which hash list lock to use: */ | 106 | /* Which hash list lock to use: */ |
107 | spinlock_t *lock_ptr; | 107 | spinlock_t *lock_ptr; |
@@ -111,7 +111,6 @@ struct futex_q { | |||
111 | 111 | ||
112 | /* Optional priority inheritance state: */ | 112 | /* Optional priority inheritance state: */ |
113 | struct futex_pi_state *pi_state; | 113 | struct futex_pi_state *pi_state; |
114 | struct task_struct *task; | ||
115 | 114 | ||
116 | /* rt_waiter storage for requeue_pi: */ | 115 | /* rt_waiter storage for requeue_pi: */ |
117 | struct rt_mutex_waiter *rt_waiter; | 116 | struct rt_mutex_waiter *rt_waiter; |
@@ -694,22 +693,29 @@ retry: | |||
694 | */ | 693 | */ |
695 | static void wake_futex(struct futex_q *q) | 694 | static void wake_futex(struct futex_q *q) |
696 | { | 695 | { |
697 | plist_del(&q->list, &q->list.plist); | 696 | struct task_struct *p = q->task; |
697 | |||
698 | /* | 698 | /* |
699 | * The lock in wake_up_all() is a crucial memory barrier after the | 699 | * We set q->lock_ptr = NULL _before_ we wake up the task. If |
700 | * plist_del() and also before assigning to q->lock_ptr. | 700 | * a non futex wake up happens on another CPU then the task |
701 | * might exit and p would dereference a non existing task | ||
702 | * struct. Prevent this by holding a reference on p across the | ||
703 | * wake up. | ||
701 | */ | 704 | */ |
702 | wake_up(&q->waiter); | 705 | get_task_struct(p); |
706 | |||
707 | plist_del(&q->list, &q->list.plist); | ||
703 | /* | 708 | /* |
704 | * The waiting task can free the futex_q as soon as this is written, | 709 | * The waiting task can free the futex_q as soon as |
705 | * without taking any locks. This must come last. | 710 | * q->lock_ptr = NULL is written, without taking any locks. A |
706 | * | 711 | * memory barrier is required here to prevent the following |
707 | * A memory barrier is required here to prevent the following store to | 712 | * store to lock_ptr from getting ahead of the plist_del. |
708 | * lock_ptr from getting ahead of the wakeup. Clearing the lock at the | ||
709 | * end of wake_up() does not prevent this store from moving. | ||
710 | */ | 713 | */ |
711 | smp_wmb(); | 714 | smp_wmb(); |
712 | q->lock_ptr = NULL; | 715 | q->lock_ptr = NULL; |
716 | |||
717 | wake_up_state(p, TASK_NORMAL); | ||
718 | put_task_struct(p); | ||
713 | } | 719 | } |
714 | 720 | ||
715 | static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | 721 | static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) |
@@ -1003,7 +1009,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) | |||
1003 | WARN_ON(!q->rt_waiter); | 1009 | WARN_ON(!q->rt_waiter); |
1004 | q->rt_waiter = NULL; | 1010 | q->rt_waiter = NULL; |
1005 | 1011 | ||
1006 | wake_up(&q->waiter); | 1012 | wake_up_state(q->task, TASK_NORMAL); |
1007 | } | 1013 | } |
1008 | 1014 | ||
1009 | /** | 1015 | /** |
@@ -1280,8 +1286,6 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) | |||
1280 | { | 1286 | { |
1281 | struct futex_hash_bucket *hb; | 1287 | struct futex_hash_bucket *hb; |
1282 | 1288 | ||
1283 | init_waitqueue_head(&q->waiter); | ||
1284 | |||
1285 | get_futex_key_refs(&q->key); | 1289 | get_futex_key_refs(&q->key); |
1286 | hb = hash_futex(&q->key); | 1290 | hb = hash_futex(&q->key); |
1287 | q->lock_ptr = &hb->lock; | 1291 | q->lock_ptr = &hb->lock; |
@@ -1575,11 +1579,9 @@ out: | |||
1575 | * @hb: the futex hash bucket, must be locked by the caller | 1579 | * @hb: the futex hash bucket, must be locked by the caller |
1576 | * @q: the futex_q to queue up on | 1580 | * @q: the futex_q to queue up on |
1577 | * @timeout: the prepared hrtimer_sleeper, or null for no timeout | 1581 | * @timeout: the prepared hrtimer_sleeper, or null for no timeout |
1578 | * @wait: the wait_queue to add to the futex_q after queueing in the hb | ||
1579 | */ | 1582 | */ |
1580 | static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | 1583 | static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, |
1581 | struct hrtimer_sleeper *timeout, | 1584 | struct hrtimer_sleeper *timeout) |
1582 | wait_queue_t *wait) | ||
1583 | { | 1585 | { |
1584 | queue_me(q, hb); | 1586 | queue_me(q, hb); |
1585 | 1587 | ||
@@ -1587,19 +1589,11 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, | |||
1587 | * There might have been scheduling since the queue_me(), as we | 1589 | * There might have been scheduling since the queue_me(), as we |
1588 | * cannot hold a spinlock across the get_user() in case it | 1590 | * cannot hold a spinlock across the get_user() in case it |
1589 | * faults, and we cannot just set TASK_INTERRUPTIBLE state when | 1591 | * faults, and we cannot just set TASK_INTERRUPTIBLE state when |
1590 | * queueing ourselves into the futex hash. This code thus has to | 1592 | * queueing ourselves into the futex hash. This code thus has to |
1591 | * rely on the futex_wake() code removing us from hash when it | 1593 | * rely on the futex_wake() code removing us from hash when it |
1592 | * wakes us up. | 1594 | * wakes us up. |
1593 | */ | 1595 | */ |
1594 | 1596 | set_current_state(TASK_INTERRUPTIBLE); | |
1595 | /* add_wait_queue is the barrier after __set_current_state. */ | ||
1596 | __set_current_state(TASK_INTERRUPTIBLE); | ||
1597 | |||
1598 | /* | ||
1599 | * Add current as the futex_q waiter. We don't remove ourselves from | ||
1600 | * the wait_queue because we are the only user of it. | ||
1601 | */ | ||
1602 | add_wait_queue(&q->waiter, wait); | ||
1603 | 1597 | ||
1604 | /* Arm the timer */ | 1598 | /* Arm the timer */ |
1605 | if (timeout) { | 1599 | if (timeout) { |
@@ -1704,7 +1698,6 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
1704 | u32 val, ktime_t *abs_time, u32 bitset, int clockrt) | 1698 | u32 val, ktime_t *abs_time, u32 bitset, int clockrt) |
1705 | { | 1699 | { |
1706 | struct hrtimer_sleeper timeout, *to = NULL; | 1700 | struct hrtimer_sleeper timeout, *to = NULL; |
1707 | DECLARE_WAITQUEUE(wait, current); | ||
1708 | struct restart_block *restart; | 1701 | struct restart_block *restart; |
1709 | struct futex_hash_bucket *hb; | 1702 | struct futex_hash_bucket *hb; |
1710 | struct futex_q q; | 1703 | struct futex_q q; |
@@ -1733,7 +1726,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, | |||
1733 | goto out; | 1726 | goto out; |
1734 | 1727 | ||
1735 | /* queue_me and wait for wakeup, timeout, or a signal. */ | 1728 | /* queue_me and wait for wakeup, timeout, or a signal. */ |
1736 | futex_wait_queue_me(hb, &q, to, &wait); | 1729 | futex_wait_queue_me(hb, &q, to); |
1737 | 1730 | ||
1738 | /* If we were woken (and unqueued), we succeeded, whatever. */ | 1731 | /* If we were woken (and unqueued), we succeeded, whatever. */ |
1739 | ret = 0; | 1732 | ret = 0; |
@@ -2147,7 +2140,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2147 | struct hrtimer_sleeper timeout, *to = NULL; | 2140 | struct hrtimer_sleeper timeout, *to = NULL; |
2148 | struct rt_mutex_waiter rt_waiter; | 2141 | struct rt_mutex_waiter rt_waiter; |
2149 | struct rt_mutex *pi_mutex = NULL; | 2142 | struct rt_mutex *pi_mutex = NULL; |
2150 | DECLARE_WAITQUEUE(wait, current); | ||
2151 | struct restart_block *restart; | 2143 | struct restart_block *restart; |
2152 | struct futex_hash_bucket *hb; | 2144 | struct futex_hash_bucket *hb; |
2153 | union futex_key key2; | 2145 | union futex_key key2; |
@@ -2191,7 +2183,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, | |||
2191 | } | 2183 | } |
2192 | 2184 | ||
2193 | /* Queue the futex_q, drop the hb lock, wait for wakeup. */ | 2185 | /* Queue the futex_q, drop the hb lock, wait for wakeup. */ |
2194 | futex_wait_queue_me(hb, &q, to, &wait); | 2186 | futex_wait_queue_me(hb, &q, to); |
2195 | 2187 | ||
2196 | spin_lock(&hb->lock); | 2188 | spin_lock(&hb->lock); |
2197 | ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); | 2189 | ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); |