diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-01-12 08:01:47 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-14 12:09:02 -0500 |
commit | 0d66bf6d3514b35eb6897629059443132992dbd7 (patch) | |
tree | a47ee0fc3299361cf3b222c8242741adfedaab74 /kernel/mutex.c | |
parent | 41719b03091911028116155deddc5eedf8c45e37 (diff) |
mutex: implement adaptive spinning
Change mutex contention behaviour such that it will sometimes busy wait on
acquisition - moving its behaviour closer to that of spinlocks.
This concept got ported to mainline from the -rt tree, where it was originally
implemented for rtmutexes by Steven Rostedt, based on work by Gregory Haskins.
Testing with Ingo's test-mutex application (http://lkml.org/lkml/2006/1/8/50)
gave a 345% boost for VFS scalability on my testbox:
# ./test-mutex-shm V 16 10 | grep "^avg ops"
avg ops/sec: 296604
# ./test-mutex-shm V 16 10 | grep "^avg ops"
avg ops/sec: 85870
The key criteria for the busy wait is that the lock owner has to be running on
a (different) cpu. The idea is that as long as the owner is running, there is a
fair chance it'll release the lock soon, and thus we'll be better off spinning
instead of blocking/scheduling.
Since regular mutexes (as opposed to rtmutexes) do not atomically track the
owner, we add the owner in a non-atomic fashion and deal with the races in
the slowpath.
Furthermore, to ease the testing of the performance impact of this new code,
there is means to disable this behaviour runtime (without having to reboot
the system), when scheduler debugging is enabled (CONFIG_SCHED_DEBUG=y),
by issuing the following command:
# echo NO_OWNER_SPIN > /debug/sched_features
This command re-enables spinning again (this is also the default):
# echo OWNER_SPIN > /debug/sched_features
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/mutex.c')
-rw-r--r-- | kernel/mutex.c | 115 |
1 files changed, 104 insertions, 11 deletions
diff --git a/kernel/mutex.c b/kernel/mutex.c index 524ffc33dc05..ff42e975590c 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -10,6 +10,11 @@ | |||
10 | * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and | 10 | * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and |
11 | * David Howells for suggestions and improvements. | 11 | * David Howells for suggestions and improvements. |
12 | * | 12 | * |
13 | * - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline | ||
14 | * from the -rt tree, where it was originally implemented for rtmutexes | ||
15 | * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale | ||
16 | * and Sven Dietrich. | ||
17 | * | ||
13 | * Also see Documentation/mutex-design.txt. | 18 | * Also see Documentation/mutex-design.txt. |
14 | */ | 19 | */ |
15 | #include <linux/mutex.h> | 20 | #include <linux/mutex.h> |
@@ -46,6 +51,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) | |||
46 | atomic_set(&lock->count, 1); | 51 | atomic_set(&lock->count, 1); |
47 | spin_lock_init(&lock->wait_lock); | 52 | spin_lock_init(&lock->wait_lock); |
48 | INIT_LIST_HEAD(&lock->wait_list); | 53 | INIT_LIST_HEAD(&lock->wait_list); |
54 | mutex_clear_owner(lock); | ||
49 | 55 | ||
50 | debug_mutex_init(lock, name, key); | 56 | debug_mutex_init(lock, name, key); |
51 | } | 57 | } |
@@ -91,6 +97,7 @@ void inline __sched mutex_lock(struct mutex *lock) | |||
91 | * 'unlocked' into 'locked' state. | 97 | * 'unlocked' into 'locked' state. |
92 | */ | 98 | */ |
93 | __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); | 99 | __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); |
100 | mutex_set_owner(lock); | ||
94 | } | 101 | } |
95 | 102 | ||
96 | EXPORT_SYMBOL(mutex_lock); | 103 | EXPORT_SYMBOL(mutex_lock); |
@@ -115,6 +122,14 @@ void __sched mutex_unlock(struct mutex *lock) | |||
115 | * The unlocking fastpath is the 0->1 transition from 'locked' | 122 | * The unlocking fastpath is the 0->1 transition from 'locked' |
116 | * into 'unlocked' state: | 123 | * into 'unlocked' state: |
117 | */ | 124 | */ |
125 | #ifndef CONFIG_DEBUG_MUTEXES | ||
126 | /* | ||
127 | * When debugging is enabled we must not clear the owner before time, | ||
128 | * the slow path will always be taken, and that clears the owner field | ||
129 | * after verifying that it was indeed current. | ||
130 | */ | ||
131 | mutex_clear_owner(lock); | ||
132 | #endif | ||
118 | __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); | 133 | __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); |
119 | } | 134 | } |
120 | 135 | ||
@@ -132,10 +147,71 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
132 | unsigned long flags; | 147 | unsigned long flags; |
133 | 148 | ||
134 | preempt_disable(); | 149 | preempt_disable(); |
150 | mutex_acquire(&lock->dep_map, subclass, 0, ip); | ||
151 | #if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) | ||
152 | /* | ||
153 | * Optimistic spinning. | ||
154 | * | ||
155 | * We try to spin for acquisition when we find that there are no | ||
156 | * pending waiters and the lock owner is currently running on a | ||
157 | * (different) CPU. | ||
158 | * | ||
159 | * The rationale is that if the lock owner is running, it is likely to | ||
160 | * release the lock soon. | ||
161 | * | ||
162 | * Since this needs the lock owner, and this mutex implementation | ||
163 | * doesn't track the owner atomically in the lock field, we need to | ||
164 | * track it non-atomically. | ||
165 | * | ||
166 | * We can't do this for DEBUG_MUTEXES because that relies on wait_lock | ||
167 | * to serialize everything. | ||
168 | */ | ||
169 | |||
170 | for (;;) { | ||
171 | struct thread_info *owner; | ||
172 | |||
173 | /* | ||
174 | * If there are pending waiters, join them. | ||
175 | */ | ||
176 | if (!list_empty(&lock->wait_list)) | ||
177 | break; | ||
178 | |||
179 | /* | ||
180 | * If there's an owner, wait for it to either | ||
181 | * release the lock or go to sleep. | ||
182 | */ | ||
183 | owner = ACCESS_ONCE(lock->owner); | ||
184 | if (owner && !mutex_spin_on_owner(lock, owner)) | ||
185 | break; | ||
186 | |||
187 | /* | ||
188 | * When there's no owner, we might have preempted between the | ||
189 | * owner acquiring the lock and setting the owner field. If | ||
190 | * we're an RT task that will live-lock because we won't let | ||
191 | * the owner complete. | ||
192 | */ | ||
193 | if (!owner && (need_resched() || rt_task(task))) | ||
194 | break; | ||
195 | |||
196 | if (atomic_cmpxchg(&lock->count, 1, 0) == 1) { | ||
197 | lock_acquired(&lock->dep_map, ip); | ||
198 | mutex_set_owner(lock); | ||
199 | preempt_enable(); | ||
200 | return 0; | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * The cpu_relax() call is a compiler barrier which forces | ||
205 | * everything in this loop to be re-loaded. We don't need | ||
206 | * memory barriers as we'll eventually observe the right | ||
207 | * values at the cost of a few extra spins. | ||
208 | */ | ||
209 | cpu_relax(); | ||
210 | } | ||
211 | #endif | ||
135 | spin_lock_mutex(&lock->wait_lock, flags); | 212 | spin_lock_mutex(&lock->wait_lock, flags); |
136 | 213 | ||
137 | debug_mutex_lock_common(lock, &waiter); | 214 | debug_mutex_lock_common(lock, &waiter); |
138 | mutex_acquire(&lock->dep_map, subclass, 0, ip); | ||
139 | debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); | 215 | debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); |
140 | 216 | ||
141 | /* add waiting tasks to the end of the waitqueue (FIFO): */ | 217 | /* add waiting tasks to the end of the waitqueue (FIFO): */ |
@@ -185,8 +261,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
185 | done: | 261 | done: |
186 | lock_acquired(&lock->dep_map, ip); | 262 | lock_acquired(&lock->dep_map, ip); |
187 | /* got the lock - rejoice! */ | 263 | /* got the lock - rejoice! */ |
188 | mutex_remove_waiter(lock, &waiter, task_thread_info(task)); | 264 | mutex_remove_waiter(lock, &waiter, current_thread_info()); |
189 | debug_mutex_set_owner(lock, task_thread_info(task)); | 265 | mutex_set_owner(lock); |
190 | 266 | ||
191 | /* set it to 0 if there are no waiters left: */ | 267 | /* set it to 0 if there are no waiters left: */ |
192 | if (likely(list_empty(&lock->wait_list))) | 268 | if (likely(list_empty(&lock->wait_list))) |
@@ -222,7 +298,8 @@ int __sched | |||
222 | mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) | 298 | mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) |
223 | { | 299 | { |
224 | might_sleep(); | 300 | might_sleep(); |
225 | return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_); | 301 | return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, |
302 | subclass, _RET_IP_); | ||
226 | } | 303 | } |
227 | 304 | ||
228 | EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); | 305 | EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); |
@@ -260,8 +337,6 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) | |||
260 | wake_up_process(waiter->task); | 337 | wake_up_process(waiter->task); |
261 | } | 338 | } |
262 | 339 | ||
263 | debug_mutex_clear_owner(lock); | ||
264 | |||
265 | spin_unlock_mutex(&lock->wait_lock, flags); | 340 | spin_unlock_mutex(&lock->wait_lock, flags); |
266 | } | 341 | } |
267 | 342 | ||
@@ -298,18 +373,30 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count); | |||
298 | */ | 373 | */ |
299 | int __sched mutex_lock_interruptible(struct mutex *lock) | 374 | int __sched mutex_lock_interruptible(struct mutex *lock) |
300 | { | 375 | { |
376 | int ret; | ||
377 | |||
301 | might_sleep(); | 378 | might_sleep(); |
302 | return __mutex_fastpath_lock_retval | 379 | ret = __mutex_fastpath_lock_retval |
303 | (&lock->count, __mutex_lock_interruptible_slowpath); | 380 | (&lock->count, __mutex_lock_interruptible_slowpath); |
381 | if (!ret) | ||
382 | mutex_set_owner(lock); | ||
383 | |||
384 | return ret; | ||
304 | } | 385 | } |
305 | 386 | ||
306 | EXPORT_SYMBOL(mutex_lock_interruptible); | 387 | EXPORT_SYMBOL(mutex_lock_interruptible); |
307 | 388 | ||
308 | int __sched mutex_lock_killable(struct mutex *lock) | 389 | int __sched mutex_lock_killable(struct mutex *lock) |
309 | { | 390 | { |
391 | int ret; | ||
392 | |||
310 | might_sleep(); | 393 | might_sleep(); |
311 | return __mutex_fastpath_lock_retval | 394 | ret = __mutex_fastpath_lock_retval |
312 | (&lock->count, __mutex_lock_killable_slowpath); | 395 | (&lock->count, __mutex_lock_killable_slowpath); |
396 | if (!ret) | ||
397 | mutex_set_owner(lock); | ||
398 | |||
399 | return ret; | ||
313 | } | 400 | } |
314 | EXPORT_SYMBOL(mutex_lock_killable); | 401 | EXPORT_SYMBOL(mutex_lock_killable); |
315 | 402 | ||
@@ -352,9 +439,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) | |||
352 | 439 | ||
353 | prev = atomic_xchg(&lock->count, -1); | 440 | prev = atomic_xchg(&lock->count, -1); |
354 | if (likely(prev == 1)) { | 441 | if (likely(prev == 1)) { |
355 | debug_mutex_set_owner(lock, current_thread_info()); | 442 | mutex_set_owner(lock); |
356 | mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); | 443 | mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); |
357 | } | 444 | } |
445 | |||
358 | /* Set it back to 0 if there are no waiters: */ | 446 | /* Set it back to 0 if there are no waiters: */ |
359 | if (likely(list_empty(&lock->wait_list))) | 447 | if (likely(list_empty(&lock->wait_list))) |
360 | atomic_set(&lock->count, 0); | 448 | atomic_set(&lock->count, 0); |
@@ -380,8 +468,13 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) | |||
380 | */ | 468 | */ |
381 | int __sched mutex_trylock(struct mutex *lock) | 469 | int __sched mutex_trylock(struct mutex *lock) |
382 | { | 470 | { |
383 | return __mutex_fastpath_trylock(&lock->count, | 471 | int ret; |
384 | __mutex_trylock_slowpath); | 472 | |
473 | ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath); | ||
474 | if (ret) | ||
475 | mutex_set_owner(lock); | ||
476 | |||
477 | return ret; | ||
385 | } | 478 | } |
386 | 479 | ||
387 | EXPORT_SYMBOL(mutex_trylock); | 480 | EXPORT_SYMBOL(mutex_trylock); |