aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-01-12 08:01:47 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-14 12:09:02 -0500
commit0d66bf6d3514b35eb6897629059443132992dbd7 (patch)
treea47ee0fc3299361cf3b222c8242741adfedaab74
parent41719b03091911028116155deddc5eedf8c45e37 (diff)
mutex: implement adaptive spinning
Change mutex contention behaviour such that it will sometimes busy wait on acquisition - moving its behaviour closer to that of spinlocks. This concept got ported to mainline from the -rt tree, where it was originally implemented for rtmutexes by Steven Rostedt, based on work by Gregory Haskins. Testing with Ingo's test-mutex application (http://lkml.org/lkml/2006/1/8/50) gave a 345% boost for VFS scalability on my testbox: # ./test-mutex-shm V 16 10 | grep "^avg ops" avg ops/sec: 296604 # ./test-mutex-shm V 16 10 | grep "^avg ops" avg ops/sec: 85870 The key criteria for the busy wait is that the lock owner has to be running on a (different) cpu. The idea is that as long as the owner is running, there is a fair chance it'll release the lock soon, and thus we'll be better off spinning instead of blocking/scheduling. Since regular mutexes (as opposed to rtmutexes) do not atomically track the owner, we add the owner in a non-atomic fashion and deal with the races in the slowpath. Furthermore, to ease the testing of the performance impact of this new code, there is means to disable this behaviour runtime (without having to reboot the system), when scheduler debugging is enabled (CONFIG_SCHED_DEBUG=y), by issuing the following command: # echo NO_OWNER_SPIN > /debug/sched_features This command re-enables spinning again (this is also the default): # echo OWNER_SPIN > /debug/sched_features Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/mutex.h5
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/mutex-debug.c9
-rw-r--r--kernel/mutex-debug.h18
-rw-r--r--kernel/mutex.c115
-rw-r--r--kernel/mutex.h22
-rw-r--r--kernel/sched.c61
-rw-r--r--kernel/sched_features.h1
8 files changed, 201 insertions, 31 deletions
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 7a0e5c4f8072..3069ec7e0ab8 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -50,8 +50,10 @@ struct mutex {
50 atomic_t count; 50 atomic_t count;
51 spinlock_t wait_lock; 51 spinlock_t wait_lock;
52 struct list_head wait_list; 52 struct list_head wait_list;
53#ifdef CONFIG_DEBUG_MUTEXES 53#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
54 struct thread_info *owner; 54 struct thread_info *owner;
55#endif
56#ifdef CONFIG_DEBUG_MUTEXES
55 const char *name; 57 const char *name;
56 void *magic; 58 void *magic;
57#endif 59#endif
@@ -68,7 +70,6 @@ struct mutex_waiter {
68 struct list_head list; 70 struct list_head list;
69 struct task_struct *task; 71 struct task_struct *task;
70#ifdef CONFIG_DEBUG_MUTEXES 72#ifdef CONFIG_DEBUG_MUTEXES
71 struct mutex *lock;
72 void *magic; 73 void *magic;
73#endif 74#endif
74}; 75};
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9f0b372cfa6f..c34b137cd1e5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -330,6 +330,7 @@ extern signed long schedule_timeout_killable(signed long timeout);
330extern signed long schedule_timeout_uninterruptible(signed long timeout); 330extern signed long schedule_timeout_uninterruptible(signed long timeout);
331asmlinkage void __schedule(void); 331asmlinkage void __schedule(void);
332asmlinkage void schedule(void); 332asmlinkage void schedule(void);
333extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
333 334
334struct nsproxy; 335struct nsproxy;
335struct user_namespace; 336struct user_namespace;
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index 1d94160eb532..50d022e5a560 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -26,11 +26,6 @@
26/* 26/*
27 * Must be called with lock->wait_lock held. 27 * Must be called with lock->wait_lock held.
28 */ 28 */
29void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner)
30{
31 lock->owner = new_owner;
32}
33
34void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) 29void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
35{ 30{
36 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); 31 memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
@@ -59,7 +54,6 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
59 54
60 /* Mark the current thread as blocked on the lock: */ 55 /* Mark the current thread as blocked on the lock: */
61 ti->task->blocked_on = waiter; 56 ti->task->blocked_on = waiter;
62 waiter->lock = lock;
63} 57}
64 58
65void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, 59void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
@@ -82,7 +76,7 @@ void debug_mutex_unlock(struct mutex *lock)
82 DEBUG_LOCKS_WARN_ON(lock->magic != lock); 76 DEBUG_LOCKS_WARN_ON(lock->magic != lock);
83 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); 77 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
84 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); 78 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
85 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); 79 mutex_clear_owner(lock);
86} 80}
87 81
88void debug_mutex_init(struct mutex *lock, const char *name, 82void debug_mutex_init(struct mutex *lock, const char *name,
@@ -95,7 +89,6 @@ void debug_mutex_init(struct mutex *lock, const char *name,
95 debug_check_no_locks_freed((void *)lock, sizeof(*lock)); 89 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
96 lockdep_init_map(&lock->dep_map, name, key, 0); 90 lockdep_init_map(&lock->dep_map, name, key, 0);
97#endif 91#endif
98 lock->owner = NULL;
99 lock->magic = lock; 92 lock->magic = lock;
100} 93}
101 94
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index babfbdfc534b..6b2d735846a5 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -13,14 +13,6 @@
13/* 13/*
14 * This must be called with lock->wait_lock held. 14 * This must be called with lock->wait_lock held.
15 */ 15 */
16extern void
17debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner);
18
19static inline void debug_mutex_clear_owner(struct mutex *lock)
20{
21 lock->owner = NULL;
22}
23
24extern void debug_mutex_lock_common(struct mutex *lock, 16extern void debug_mutex_lock_common(struct mutex *lock,
25 struct mutex_waiter *waiter); 17 struct mutex_waiter *waiter);
26extern void debug_mutex_wake_waiter(struct mutex *lock, 18extern void debug_mutex_wake_waiter(struct mutex *lock,
@@ -35,6 +27,16 @@ extern void debug_mutex_unlock(struct mutex *lock);
35extern void debug_mutex_init(struct mutex *lock, const char *name, 27extern void debug_mutex_init(struct mutex *lock, const char *name,
36 struct lock_class_key *key); 28 struct lock_class_key *key);
37 29
30static inline void mutex_set_owner(struct mutex *lock)
31{
32 lock->owner = current_thread_info();
33}
34
35static inline void mutex_clear_owner(struct mutex *lock)
36{
37 lock->owner = NULL;
38}
39
38#define spin_lock_mutex(lock, flags) \ 40#define spin_lock_mutex(lock, flags) \
39 do { \ 41 do { \
40 struct mutex *l = container_of(lock, struct mutex, wait_lock); \ 42 struct mutex *l = container_of(lock, struct mutex, wait_lock); \
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 524ffc33dc05..ff42e975590c 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -10,6 +10,11 @@
10 * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and 10 * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and
11 * David Howells for suggestions and improvements. 11 * David Howells for suggestions and improvements.
12 * 12 *
13 * - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline
14 * from the -rt tree, where it was originally implemented for rtmutexes
15 * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale
16 * and Sven Dietrich.
17 *
13 * Also see Documentation/mutex-design.txt. 18 * Also see Documentation/mutex-design.txt.
14 */ 19 */
15#include <linux/mutex.h> 20#include <linux/mutex.h>
@@ -46,6 +51,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
46 atomic_set(&lock->count, 1); 51 atomic_set(&lock->count, 1);
47 spin_lock_init(&lock->wait_lock); 52 spin_lock_init(&lock->wait_lock);
48 INIT_LIST_HEAD(&lock->wait_list); 53 INIT_LIST_HEAD(&lock->wait_list);
54 mutex_clear_owner(lock);
49 55
50 debug_mutex_init(lock, name, key); 56 debug_mutex_init(lock, name, key);
51} 57}
@@ -91,6 +97,7 @@ void inline __sched mutex_lock(struct mutex *lock)
91 * 'unlocked' into 'locked' state. 97 * 'unlocked' into 'locked' state.
92 */ 98 */
93 __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); 99 __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
100 mutex_set_owner(lock);
94} 101}
95 102
96EXPORT_SYMBOL(mutex_lock); 103EXPORT_SYMBOL(mutex_lock);
@@ -115,6 +122,14 @@ void __sched mutex_unlock(struct mutex *lock)
115 * The unlocking fastpath is the 0->1 transition from 'locked' 122 * The unlocking fastpath is the 0->1 transition from 'locked'
116 * into 'unlocked' state: 123 * into 'unlocked' state:
117 */ 124 */
125#ifndef CONFIG_DEBUG_MUTEXES
126 /*
127 * When debugging is enabled we must not clear the owner before time,
128 * the slow path will always be taken, and that clears the owner field
129 * after verifying that it was indeed current.
130 */
131 mutex_clear_owner(lock);
132#endif
118 __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); 133 __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
119} 134}
120 135
@@ -132,10 +147,71 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
132 unsigned long flags; 147 unsigned long flags;
133 148
134 preempt_disable(); 149 preempt_disable();
150 mutex_acquire(&lock->dep_map, subclass, 0, ip);
151#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES)
152 /*
153 * Optimistic spinning.
154 *
155 * We try to spin for acquisition when we find that there are no
156 * pending waiters and the lock owner is currently running on a
157 * (different) CPU.
158 *
159 * The rationale is that if the lock owner is running, it is likely to
160 * release the lock soon.
161 *
162 * Since this needs the lock owner, and this mutex implementation
163 * doesn't track the owner atomically in the lock field, we need to
164 * track it non-atomically.
165 *
166 * We can't do this for DEBUG_MUTEXES because that relies on wait_lock
167 * to serialize everything.
168 */
169
170 for (;;) {
171 struct thread_info *owner;
172
173 /*
174 * If there are pending waiters, join them.
175 */
176 if (!list_empty(&lock->wait_list))
177 break;
178
179 /*
180 * If there's an owner, wait for it to either
181 * release the lock or go to sleep.
182 */
183 owner = ACCESS_ONCE(lock->owner);
184 if (owner && !mutex_spin_on_owner(lock, owner))
185 break;
186
187 /*
188 * When there's no owner, we might have preempted between the
189 * owner acquiring the lock and setting the owner field. If
190 * we're an RT task that will live-lock because we won't let
191 * the owner complete.
192 */
193 if (!owner && (need_resched() || rt_task(task)))
194 break;
195
196 if (atomic_cmpxchg(&lock->count, 1, 0) == 1) {
197 lock_acquired(&lock->dep_map, ip);
198 mutex_set_owner(lock);
199 preempt_enable();
200 return 0;
201 }
202
203 /*
204 * The cpu_relax() call is a compiler barrier which forces
205 * everything in this loop to be re-loaded. We don't need
206 * memory barriers as we'll eventually observe the right
207 * values at the cost of a few extra spins.
208 */
209 cpu_relax();
210 }
211#endif
135 spin_lock_mutex(&lock->wait_lock, flags); 212 spin_lock_mutex(&lock->wait_lock, flags);
136 213
137 debug_mutex_lock_common(lock, &waiter); 214 debug_mutex_lock_common(lock, &waiter);
138 mutex_acquire(&lock->dep_map, subclass, 0, ip);
139 debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); 215 debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
140 216
141 /* add waiting tasks to the end of the waitqueue (FIFO): */ 217 /* add waiting tasks to the end of the waitqueue (FIFO): */
@@ -185,8 +261,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
185done: 261done:
186 lock_acquired(&lock->dep_map, ip); 262 lock_acquired(&lock->dep_map, ip);
187 /* got the lock - rejoice! */ 263 /* got the lock - rejoice! */
188 mutex_remove_waiter(lock, &waiter, task_thread_info(task)); 264 mutex_remove_waiter(lock, &waiter, current_thread_info());
189 debug_mutex_set_owner(lock, task_thread_info(task)); 265 mutex_set_owner(lock);
190 266
191 /* set it to 0 if there are no waiters left: */ 267 /* set it to 0 if there are no waiters left: */
192 if (likely(list_empty(&lock->wait_list))) 268 if (likely(list_empty(&lock->wait_list)))
@@ -222,7 +298,8 @@ int __sched
222mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) 298mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
223{ 299{
224 might_sleep(); 300 might_sleep();
225 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_); 301 return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
302 subclass, _RET_IP_);
226} 303}
227 304
228EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); 305EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -260,8 +337,6 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
260 wake_up_process(waiter->task); 337 wake_up_process(waiter->task);
261 } 338 }
262 339
263 debug_mutex_clear_owner(lock);
264
265 spin_unlock_mutex(&lock->wait_lock, flags); 340 spin_unlock_mutex(&lock->wait_lock, flags);
266} 341}
267 342
@@ -298,18 +373,30 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count);
298 */ 373 */
299int __sched mutex_lock_interruptible(struct mutex *lock) 374int __sched mutex_lock_interruptible(struct mutex *lock)
300{ 375{
376 int ret;
377
301 might_sleep(); 378 might_sleep();
302 return __mutex_fastpath_lock_retval 379 ret = __mutex_fastpath_lock_retval
303 (&lock->count, __mutex_lock_interruptible_slowpath); 380 (&lock->count, __mutex_lock_interruptible_slowpath);
381 if (!ret)
382 mutex_set_owner(lock);
383
384 return ret;
304} 385}
305 386
306EXPORT_SYMBOL(mutex_lock_interruptible); 387EXPORT_SYMBOL(mutex_lock_interruptible);
307 388
308int __sched mutex_lock_killable(struct mutex *lock) 389int __sched mutex_lock_killable(struct mutex *lock)
309{ 390{
391 int ret;
392
310 might_sleep(); 393 might_sleep();
311 return __mutex_fastpath_lock_retval 394 ret = __mutex_fastpath_lock_retval
312 (&lock->count, __mutex_lock_killable_slowpath); 395 (&lock->count, __mutex_lock_killable_slowpath);
396 if (!ret)
397 mutex_set_owner(lock);
398
399 return ret;
313} 400}
314EXPORT_SYMBOL(mutex_lock_killable); 401EXPORT_SYMBOL(mutex_lock_killable);
315 402
@@ -352,9 +439,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
352 439
353 prev = atomic_xchg(&lock->count, -1); 440 prev = atomic_xchg(&lock->count, -1);
354 if (likely(prev == 1)) { 441 if (likely(prev == 1)) {
355 debug_mutex_set_owner(lock, current_thread_info()); 442 mutex_set_owner(lock);
356 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); 443 mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
357 } 444 }
445
358 /* Set it back to 0 if there are no waiters: */ 446 /* Set it back to 0 if there are no waiters: */
359 if (likely(list_empty(&lock->wait_list))) 447 if (likely(list_empty(&lock->wait_list)))
360 atomic_set(&lock->count, 0); 448 atomic_set(&lock->count, 0);
@@ -380,8 +468,13 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
380 */ 468 */
381int __sched mutex_trylock(struct mutex *lock) 469int __sched mutex_trylock(struct mutex *lock)
382{ 470{
383 return __mutex_fastpath_trylock(&lock->count, 471 int ret;
384 __mutex_trylock_slowpath); 472
473 ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath);
474 if (ret)
475 mutex_set_owner(lock);
476
477 return ret;
385} 478}
386 479
387EXPORT_SYMBOL(mutex_trylock); 480EXPORT_SYMBOL(mutex_trylock);
diff --git a/kernel/mutex.h b/kernel/mutex.h
index a075dafbb290..67578ca48f94 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -16,8 +16,26 @@
16#define mutex_remove_waiter(lock, waiter, ti) \ 16#define mutex_remove_waiter(lock, waiter, ti) \
17 __list_del((waiter)->list.prev, (waiter)->list.next) 17 __list_del((waiter)->list.prev, (waiter)->list.next)
18 18
19#define debug_mutex_set_owner(lock, new_owner) do { } while (0) 19#ifdef CONFIG_SMP
20#define debug_mutex_clear_owner(lock) do { } while (0) 20static inline void mutex_set_owner(struct mutex *lock)
21{
22 lock->owner = current_thread_info();
23}
24
25static inline void mutex_clear_owner(struct mutex *lock)
26{
27 lock->owner = NULL;
28}
29#else
30static inline void mutex_set_owner(struct mutex *lock)
31{
32}
33
34static inline void mutex_clear_owner(struct mutex *lock)
35{
36}
37#endif
38
21#define debug_mutex_wake_waiter(lock, waiter) do { } while (0) 39#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
22#define debug_mutex_free_waiter(waiter) do { } while (0) 40#define debug_mutex_free_waiter(waiter) do { } while (0)
23#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) 41#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
diff --git a/kernel/sched.c b/kernel/sched.c
index b001c133c359..589e7308c615 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4614,6 +4614,67 @@ need_resched:
4614} 4614}
4615EXPORT_SYMBOL(schedule); 4615EXPORT_SYMBOL(schedule);
4616 4616
4617#ifdef CONFIG_SMP
4618/*
4619 * Look out! "owner" is an entirely speculative pointer
4620 * access and not reliable.
4621 */
4622int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
4623{
4624 unsigned int cpu;
4625 struct rq *rq;
4626
4627 if (!sched_feat(OWNER_SPIN))
4628 return 0;
4629
4630#ifdef CONFIG_DEBUG_PAGEALLOC
4631 /*
4632 * Need to access the cpu field knowing that
4633 * DEBUG_PAGEALLOC could have unmapped it if
4634 * the mutex owner just released it and exited.
4635 */
4636 if (probe_kernel_address(&owner->cpu, cpu))
4637 goto out;
4638#else
4639 cpu = owner->cpu;
4640#endif
4641
4642 /*
4643 * Even if the access succeeded (likely case),
4644 * the cpu field may no longer be valid.
4645 */
4646 if (cpu >= nr_cpumask_bits)
4647 goto out;
4648
4649 /*
4650 * We need to validate that we can do a
4651 * get_cpu() and that we have the percpu area.
4652 */
4653 if (!cpu_online(cpu))
4654 goto out;
4655
4656 rq = cpu_rq(cpu);
4657
4658 for (;;) {
4659 /*
4660 * Owner changed, break to re-assess state.
4661 */
4662 if (lock->owner != owner)
4663 break;
4664
4665 /*
4666 * Is that owner really running on that cpu?
4667 */
4668 if (task_thread_info(rq->curr) != owner || need_resched())
4669 return 0;
4670
4671 cpu_relax();
4672 }
4673out:
4674 return 1;
4675}
4676#endif
4677
4617#ifdef CONFIG_PREEMPT 4678#ifdef CONFIG_PREEMPT
4618/* 4679/*
4619 * this is the entry point to schedule() from in-kernel preemption 4680 * this is the entry point to schedule() from in-kernel preemption
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index da5d93b5d2c6..07bc02e99ab1 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -13,3 +13,4 @@ SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
13SCHED_FEAT(ASYM_EFF_LOAD, 1) 13SCHED_FEAT(ASYM_EFF_LOAD, 1)
14SCHED_FEAT(WAKEUP_OVERLAP, 0) 14SCHED_FEAT(WAKEUP_OVERLAP, 0)
15SCHED_FEAT(LAST_BUDDY, 1) 15SCHED_FEAT(LAST_BUDDY, 1)
16SCHED_FEAT(OWNER_SPIN, 1)