diff options
author | Peter Zijlstra <peterz@infradead.org> | 2017-03-22 06:35:58 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2017-03-23 14:10:09 -0400 |
commit | cfafcd117da0216520568c195cb2f6cd1980c4bb (patch) | |
tree | cce98f12a6bfa27515fb1cabc5bbd6fd55a8459f /kernel/futex.c | |
parent | 38d589f2fd08f1296aea3ce62bebd185125c6d81 (diff) |
futex: Rework futex_lock_pi() to use rt_mutex_*_proxy_lock()
By changing futex_lock_pi() to use rt_mutex_*_proxy_lock() all wait_list
modifications are done under both hb->lock and wait_lock.
This closes the obvious interleave pattern between futex_lock_pi() and
futex_unlock_pi(), but not entirely so. See below:
Before:
futex_lock_pi() futex_unlock_pi()
unlock hb->lock
lock hb->lock
unlock hb->lock
lock rt_mutex->wait_lock
unlock rt_mutex_wait_lock
-EAGAIN
lock rt_mutex->wait_lock
list_add
unlock rt_mutex->wait_lock
schedule()
lock rt_mutex->wait_lock
list_del
unlock rt_mutex->wait_lock
<idem>
-EAGAIN
lock hb->lock
After:
futex_lock_pi() futex_unlock_pi()
lock hb->lock
lock rt_mutex->wait_lock
list_add
unlock rt_mutex->wait_lock
unlock hb->lock
schedule()
lock hb->lock
unlock hb->lock
lock hb->lock
lock rt_mutex->wait_lock
list_del
unlock rt_mutex->wait_lock
lock rt_mutex->wait_lock
unlock rt_mutex_wait_lock
-EAGAIN
unlock hb->lock
It does however solve the earlier starvation/live-lock scenario which got
introduced with the -EAGAIN since unlike the before scenario; where the
-EAGAIN happens while futex_unlock_pi() doesn't hold any locks; in the
after scenario it happens while futex_unlock_pi() actually holds a lock,
and then it is serialized on that lock.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: juri.lelli@arm.com
Cc: bigeasy@linutronix.de
Cc: xlpang@redhat.com
Cc: rostedt@goodmis.org
Cc: mathieu.desnoyers@efficios.com
Cc: jdesfossez@efficios.com
Cc: dvhart@infradead.org
Cc: bristot@redhat.com
Link: http://lkml.kernel.org/r/20170322104152.062785528@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/futex.c')
-rw-r--r-- | kernel/futex.c | 77 |
1 files changed, 55 insertions, 22 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 1cd8df7d3a43..eecce7bab86d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -2099,20 +2099,7 @@ queue_unlock(struct futex_hash_bucket *hb) | |||
2099 | hb_waiters_dec(hb); | 2099 | hb_waiters_dec(hb); |
2100 | } | 2100 | } |
2101 | 2101 | ||
2102 | /** | 2102 | static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) |
2103 | * queue_me() - Enqueue the futex_q on the futex_hash_bucket | ||
2104 | * @q: The futex_q to enqueue | ||
2105 | * @hb: The destination hash bucket | ||
2106 | * | ||
2107 | * The hb->lock must be held by the caller, and is released here. A call to | ||
2108 | * queue_me() is typically paired with exactly one call to unqueue_me(). The | ||
2109 | * exceptions involve the PI related operations, which may use unqueue_me_pi() | ||
2110 | * or nothing if the unqueue is done as part of the wake process and the unqueue | ||
2111 | * state is implicit in the state of woken task (see futex_wait_requeue_pi() for | ||
2112 | * an example). | ||
2113 | */ | ||
2114 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | ||
2115 | __releases(&hb->lock) | ||
2116 | { | 2103 | { |
2117 | int prio; | 2104 | int prio; |
2118 | 2105 | ||
@@ -2129,6 +2116,24 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | |||
2129 | plist_node_init(&q->list, prio); | 2116 | plist_node_init(&q->list, prio); |
2130 | plist_add(&q->list, &hb->chain); | 2117 | plist_add(&q->list, &hb->chain); |
2131 | q->task = current; | 2118 | q->task = current; |
2119 | } | ||
2120 | |||
2121 | /** | ||
2122 | * queue_me() - Enqueue the futex_q on the futex_hash_bucket | ||
2123 | * @q: The futex_q to enqueue | ||
2124 | * @hb: The destination hash bucket | ||
2125 | * | ||
2126 | * The hb->lock must be held by the caller, and is released here. A call to | ||
2127 | * queue_me() is typically paired with exactly one call to unqueue_me(). The | ||
2128 | * exceptions involve the PI related operations, which may use unqueue_me_pi() | ||
2129 | * or nothing if the unqueue is done as part of the wake process and the unqueue | ||
2130 | * state is implicit in the state of woken task (see futex_wait_requeue_pi() for | ||
2131 | * an example). | ||
2132 | */ | ||
2133 | static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | ||
2134 | __releases(&hb->lock) | ||
2135 | { | ||
2136 | __queue_me(q, hb); | ||
2132 | spin_unlock(&hb->lock); | 2137 | spin_unlock(&hb->lock); |
2133 | } | 2138 | } |
2134 | 2139 | ||
@@ -2587,6 +2592,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, | |||
2587 | { | 2592 | { |
2588 | struct hrtimer_sleeper timeout, *to = NULL; | 2593 | struct hrtimer_sleeper timeout, *to = NULL; |
2589 | struct futex_pi_state *pi_state = NULL; | 2594 | struct futex_pi_state *pi_state = NULL; |
2595 | struct rt_mutex_waiter rt_waiter; | ||
2590 | struct futex_hash_bucket *hb; | 2596 | struct futex_hash_bucket *hb; |
2591 | struct futex_q q = futex_q_init; | 2597 | struct futex_q q = futex_q_init; |
2592 | int res, ret; | 2598 | int res, ret; |
@@ -2639,25 +2645,52 @@ retry_private: | |||
2639 | } | 2645 | } |
2640 | } | 2646 | } |
2641 | 2647 | ||
2648 | WARN_ON(!q.pi_state); | ||
2649 | |||
2642 | /* | 2650 | /* |
2643 | * Only actually queue now that the atomic ops are done: | 2651 | * Only actually queue now that the atomic ops are done: |
2644 | */ | 2652 | */ |
2645 | queue_me(&q, hb); | 2653 | __queue_me(&q, hb); |
2646 | 2654 | ||
2647 | WARN_ON(!q.pi_state); | 2655 | if (trylock) { |
2648 | /* | ||
2649 | * Block on the PI mutex: | ||
2650 | */ | ||
2651 | if (!trylock) { | ||
2652 | ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to); | ||
2653 | } else { | ||
2654 | ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); | 2656 | ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); |
2655 | /* Fixup the trylock return value: */ | 2657 | /* Fixup the trylock return value: */ |
2656 | ret = ret ? 0 : -EWOULDBLOCK; | 2658 | ret = ret ? 0 : -EWOULDBLOCK; |
2659 | goto no_block; | ||
2660 | } | ||
2661 | |||
2662 | /* | ||
2663 | * We must add ourselves to the rt_mutex waitlist while holding hb->lock | ||
2664 | * such that the hb and rt_mutex wait lists match. | ||
2665 | */ | ||
2666 | rt_mutex_init_waiter(&rt_waiter); | ||
2667 | ret = rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); | ||
2668 | if (ret) { | ||
2669 | if (ret == 1) | ||
2670 | ret = 0; | ||
2671 | |||
2672 | goto no_block; | ||
2657 | } | 2673 | } |
2658 | 2674 | ||
2675 | spin_unlock(q.lock_ptr); | ||
2676 | |||
2677 | if (unlikely(to)) | ||
2678 | hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); | ||
2679 | |||
2680 | ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); | ||
2681 | |||
2659 | spin_lock(q.lock_ptr); | 2682 | spin_lock(q.lock_ptr); |
2660 | /* | 2683 | /* |
2684 | * If we failed to acquire the lock (signal/timeout), we must | ||
2685 | * first acquire the hb->lock before removing the lock from the | ||
2686 | * rt_mutex waitqueue, such that we can keep the hb and rt_mutex | ||
2687 | * wait lists consistent. | ||
2688 | */ | ||
2689 | if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) | ||
2690 | ret = 0; | ||
2691 | |||
2692 | no_block: | ||
2693 | /* | ||
2661 | * Fixup the pi_state owner and possibly acquire the lock if we | 2694 | * Fixup the pi_state owner and possibly acquire the lock if we |
2662 | * haven't already. | 2695 | * haven't already. |
2663 | */ | 2696 | */ |