diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2014-06-11 14:44:04 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2014-06-16 04:03:09 -0400 |
commit | 27e35715df54cbc4f2d044f681802ae30479e7fb (patch) | |
tree | deacb08195cf3cc2ea7480f103991bd4b3e4aa6b /kernel | |
parent | 82084984383babe728e6e3c9a8e5c46278091315 (diff) |
rtmutex: Plug slow unlock race
When the rtmutex fast path is enabled the slow unlock function can
create the following situation:
spin_lock(foo->m->wait_lock);
foo->m->owner = NULL;
rt_mutex_lock(foo->m); <-- fast path
free = atomic_dec_and_test(foo->refcnt);
rt_mutex_unlock(foo->m); <-- fast path
if (free)
kfree(foo);
spin_unlock(foo->m->wait_lock); <--- Use after free.
Plug the race by changing the slow unlock to the following scheme:
while (!rt_mutex_has_waiters(m)) {
/* Clear the waiters bit in m->owner */
clear_rt_mutex_waiters(m);
owner = rt_mutex_owner(m);
spin_unlock(m->wait_lock);
if (cmpxchg(m->owner, owner, 0) == owner)
return;
spin_lock(m->wait_lock);
}
So in case of a new waiter incoming while the owner tries the slow
path unlock we have two situations:
unlock(wait_lock);
lock(wait_lock);
cmpxchg(p, owner, 0) == owner
mark_rt_mutex_waiters(lock);
acquire(lock);
Or:
unlock(wait_lock);
lock(wait_lock);
mark_rt_mutex_waiters(lock);
cmpxchg(p, owner, 0) != owner
enqueue_waiter();
unlock(wait_lock);
lock(wait_lock);
wakeup_next waiter();
unlock(wait_lock);
lock(wait_lock);
acquire(lock);
If the fast path is disabled, then the simple
m->owner = NULL;
unlock(m->wait_lock);
is sufficient as all access to m->owner is serialized via
m->wait_lock;
Also document and clarify the wakeup_next_waiter function as suggested
by Oleg Nesterov.
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140611183852.937945560@linutronix.de
Cc: stable@vger.kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/locking/rtmutex.c | 115 |
1 files changed, 109 insertions, 6 deletions
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index a8a83a22bb91..fc605941b9b8 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
@@ -83,6 +83,47 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) | |||
83 | owner = *p; | 83 | owner = *p; |
84 | } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); | 84 | } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); |
85 | } | 85 | } |
86 | |||
87 | /* | ||
88 | * Safe fastpath aware unlock: | ||
89 | * 1) Clear the waiters bit | ||
90 | * 2) Drop lock->wait_lock | ||
91 | * 3) Try to unlock the lock with cmpxchg | ||
92 | */ | ||
93 | static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) | ||
94 | __releases(lock->wait_lock) | ||
95 | { | ||
96 | struct task_struct *owner = rt_mutex_owner(lock); | ||
97 | |||
98 | clear_rt_mutex_waiters(lock); | ||
99 | raw_spin_unlock(&lock->wait_lock); | ||
100 | /* | ||
101 | * If a new waiter comes in between the unlock and the cmpxchg | ||
102 | * we have two situations: | ||
103 | * | ||
104 | * unlock(wait_lock); | ||
105 | * lock(wait_lock); | ||
106 | * cmpxchg(p, owner, 0) == owner | ||
107 | * mark_rt_mutex_waiters(lock); | ||
108 | * acquire(lock); | ||
109 | * or: | ||
110 | * | ||
111 | * unlock(wait_lock); | ||
112 | * lock(wait_lock); | ||
113 | * mark_rt_mutex_waiters(lock); | ||
114 | * | ||
115 | * cmpxchg(p, owner, 0) != owner | ||
116 | * enqueue_waiter(); | ||
117 | * unlock(wait_lock); | ||
118 | * lock(wait_lock); | ||
119 | * wake waiter(); | ||
120 | * unlock(wait_lock); | ||
121 | * lock(wait_lock); | ||
122 | * acquire(lock); | ||
123 | */ | ||
124 | return rt_mutex_cmpxchg(lock, owner, NULL); | ||
125 | } | ||
126 | |||
86 | #else | 127 | #else |
87 | # define rt_mutex_cmpxchg(l,c,n) (0) | 128 | # define rt_mutex_cmpxchg(l,c,n) (0) |
88 | static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) | 129 | static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) |
@@ -90,6 +131,17 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) | |||
90 | lock->owner = (struct task_struct *) | 131 | lock->owner = (struct task_struct *) |
91 | ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); | 132 | ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); |
92 | } | 133 | } |
134 | |||
135 | /* | ||
136 | * Simple slow path only version: lock->owner is protected by lock->wait_lock. | ||
137 | */ | ||
138 | static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) | ||
139 | __releases(lock->wait_lock) | ||
140 | { | ||
141 | lock->owner = NULL; | ||
142 | raw_spin_unlock(&lock->wait_lock); | ||
143 | return true; | ||
144 | } | ||
93 | #endif | 145 | #endif |
94 | 146 | ||
95 | static inline int | 147 | static inline int |
@@ -650,7 +702,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |||
650 | /* | 702 | /* |
651 | * Wake up the next waiter on the lock. | 703 | * Wake up the next waiter on the lock. |
652 | * | 704 | * |
653 | * Remove the top waiter from the current tasks waiter list and wake it up. | 705 | * Remove the top waiter from the current tasks pi waiter list and |
706 | * wake it up. | ||
654 | * | 707 | * |
655 | * Called with lock->wait_lock held. | 708 | * Called with lock->wait_lock held. |
656 | */ | 709 | */ |
@@ -671,10 +724,23 @@ static void wakeup_next_waiter(struct rt_mutex *lock) | |||
671 | */ | 724 | */ |
672 | rt_mutex_dequeue_pi(current, waiter); | 725 | rt_mutex_dequeue_pi(current, waiter); |
673 | 726 | ||
674 | rt_mutex_set_owner(lock, NULL); | 727 | /* |
728 | * As we are waking up the top waiter, and the waiter stays | ||
729 | * queued on the lock until it gets the lock, this lock | ||
730 | * obviously has waiters. Just set the bit here and this has | ||
731 | * the added benefit of forcing all new tasks into the | ||
732 | * slow path making sure no task of lower priority than | ||
733 | * the top waiter can steal this lock. | ||
734 | */ | ||
735 | lock->owner = (void *) RT_MUTEX_HAS_WAITERS; | ||
675 | 736 | ||
676 | raw_spin_unlock_irqrestore(¤t->pi_lock, flags); | 737 | raw_spin_unlock_irqrestore(¤t->pi_lock, flags); |
677 | 738 | ||
739 | /* | ||
740 | * It's safe to dereference waiter as it cannot go away as | ||
741 | * long as we hold lock->wait_lock. The waiter task needs to | ||
742 | * acquire it in order to dequeue the waiter. | ||
743 | */ | ||
678 | wake_up_process(waiter->task); | 744 | wake_up_process(waiter->task); |
679 | } | 745 | } |
680 | 746 | ||
@@ -928,12 +994,49 @@ rt_mutex_slowunlock(struct rt_mutex *lock) | |||
928 | 994 | ||
929 | rt_mutex_deadlock_account_unlock(current); | 995 | rt_mutex_deadlock_account_unlock(current); |
930 | 996 | ||
931 | if (!rt_mutex_has_waiters(lock)) { | 997 | /* |
932 | lock->owner = NULL; | 998 | * We must be careful here if the fast path is enabled. If we |
933 | raw_spin_unlock(&lock->wait_lock); | 999 | * have no waiters queued we cannot set owner to NULL here |
934 | return; | 1000 | * because of: |
1001 | * | ||
1002 | * foo->lock->owner = NULL; | ||
1003 | * rtmutex_lock(foo->lock); <- fast path | ||
1004 | * free = atomic_dec_and_test(foo->refcnt); | ||
1005 | * rtmutex_unlock(foo->lock); <- fast path | ||
1006 | * if (free) | ||
1007 | * kfree(foo); | ||
1008 | * raw_spin_unlock(foo->lock->wait_lock); | ||
1009 | * | ||
1010 | * So for the fastpath enabled kernel: | ||
1011 | * | ||
1012 | * Nothing can set the waiters bit as long as we hold | ||
1013 | * lock->wait_lock. So we do the following sequence: | ||
1014 | * | ||
1015 | * owner = rt_mutex_owner(lock); | ||
1016 | * clear_rt_mutex_waiters(lock); | ||
1017 | * raw_spin_unlock(&lock->wait_lock); | ||
1018 | * if (cmpxchg(&lock->owner, owner, 0) == owner) | ||
1019 | * return; | ||
1020 | * goto retry; | ||
1021 | * | ||
1022 | * The fastpath disabled variant is simple as all access to | ||
1023 | * lock->owner is serialized by lock->wait_lock: | ||
1024 | * | ||
1025 | * lock->owner = NULL; | ||
1026 | * raw_spin_unlock(&lock->wait_lock); | ||
1027 | */ | ||
1028 | while (!rt_mutex_has_waiters(lock)) { | ||
1029 | /* Drops lock->wait_lock ! */ | ||
1030 | if (unlock_rt_mutex_safe(lock) == true) | ||
1031 | return; | ||
1032 | /* Relock the rtmutex and try again */ | ||
1033 | raw_spin_lock(&lock->wait_lock); | ||
935 | } | 1034 | } |
936 | 1035 | ||
1036 | /* | ||
1037 | * The wakeup next waiter path does not suffer from the above | ||
1038 | * race. See the comments there. | ||
1039 | */ | ||
937 | wakeup_next_waiter(lock); | 1040 | wakeup_next_waiter(lock); |
938 | 1041 | ||
939 | raw_spin_unlock(&lock->wait_lock); | 1042 | raw_spin_unlock(&lock->wait_lock); |