aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-01-12 08:01:47 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-14 12:09:02 -0500
commit0d66bf6d3514b35eb6897629059443132992dbd7 (patch)
treea47ee0fc3299361cf3b222c8242741adfedaab74 /kernel/sched.c
parent41719b03091911028116155deddc5eedf8c45e37 (diff)
mutex: implement adaptive spinning
Change mutex contention behaviour such that it will sometimes busy wait on acquisition - moving its behaviour closer to that of spinlocks. This concept got ported to mainline from the -rt tree, where it was originally implemented for rtmutexes by Steven Rostedt, based on work by Gregory Haskins. Testing with Ingo's test-mutex application (http://lkml.org/lkml/2006/1/8/50) gave a 345% boost for VFS scalability on my testbox: # ./test-mutex-shm V 16 10 | grep "^avg ops" avg ops/sec: 296604 # ./test-mutex-shm V 16 10 | grep "^avg ops" avg ops/sec: 85870 The key criteria for the busy wait is that the lock owner has to be running on a (different) cpu. The idea is that as long as the owner is running, there is a fair chance it'll release the lock soon, and thus we'll be better off spinning instead of blocking/scheduling. Since regular mutexes (as opposed to rtmutexes) do not atomically track the owner, we add the owner in a non-atomic fashion and deal with the races in the slowpath. Furthermore, to ease the testing of the performance impact of this new code, there is means to disable this behaviour runtime (without having to reboot the system), when scheduler debugging is enabled (CONFIG_SCHED_DEBUG=y), by issuing the following command: # echo NO_OWNER_SPIN > /debug/sched_features This command re-enables spinning again (this is also the default): # echo OWNER_SPIN > /debug/sched_features Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c61
1 files changed, 61 insertions, 0 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index b001c133c359..589e7308c615 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4614,6 +4614,67 @@ need_resched:
4614} 4614}
4615EXPORT_SYMBOL(schedule); 4615EXPORT_SYMBOL(schedule);
4616 4616
4617#ifdef CONFIG_SMP
4618/*
4619 * Look out! "owner" is an entirely speculative pointer
4620 * access and not reliable.
4621 */
4622int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
4623{
4624 unsigned int cpu;
4625 struct rq *rq;
4626
4627 if (!sched_feat(OWNER_SPIN))
4628 return 0;
4629
4630#ifdef CONFIG_DEBUG_PAGEALLOC
4631 /*
4632 * Need to access the cpu field knowing that
4633 * DEBUG_PAGEALLOC could have unmapped it if
4634 * the mutex owner just released it and exited.
4635 */
4636 if (probe_kernel_address(&owner->cpu, cpu))
4637 goto out;
4638#else
4639 cpu = owner->cpu;
4640#endif
4641
4642 /*
4643 * Even if the access succeeded (likely case),
4644 * the cpu field may no longer be valid.
4645 */
4646 if (cpu >= nr_cpumask_bits)
4647 goto out;
4648
4649 /*
4650 * We need to validate that we can do a
4651 * get_cpu() and that we have the percpu area.
4652 */
4653 if (!cpu_online(cpu))
4654 goto out;
4655
4656 rq = cpu_rq(cpu);
4657
4658 for (;;) {
4659 /*
4660 * Owner changed, break to re-assess state.
4661 */
4662 if (lock->owner != owner)
4663 break;
4664
4665 /*
4666 * Is that owner really running on that cpu?
4667 */
4668 if (task_thread_info(rq->curr) != owner || need_resched())
4669 return 0;
4670
4671 cpu_relax();
4672 }
4673out:
4674 return 1;
4675}
4676#endif
4677
4617#ifdef CONFIG_PREEMPT 4678#ifdef CONFIG_PREEMPT
4618/* 4679/*
4619 * this is the entry point to schedule() from in-kernel preemption 4680 * this is the entry point to schedule() from in-kernel preemption