mutex: implement adaptive spinning

Change mutex contention behaviour such that it will sometimes busy wait on acquisition - moving its behaviour closer to that of spinlocks. This concept got ported to mainline from the -rt tree, where it was originally implemented for rtmutexes by Steven Rostedt, based on work by Gregory Haskins. Testing with Ingo's test-mutex application (http://lkml.org/lkml/2006/1/8/50) gave a 345% boost for VFS scalability on my testbox: # ./test-mutex-shm V 16 10 | grep "^avg ops" avg ops/sec: 296604 # ./test-mutex-shm V 16 10 | grep "^avg ops" avg ops/sec: 85870 The key criteria for the busy wait is that the lock owner has to be running on a (different) cpu. The idea is that as long as the owner is running, there is a fair chance it'll release the lock soon, and thus we'll be better off spinning instead of blocking/scheduling. Since regular mutexes (as opposed to rtmutexes) do not atomically track the owner, we add the owner in a non-atomic fashion and deal with the races in the slowpath. Furthermore, to ease the testing of the performance impact of this new code, there is means to disable this behaviour runtime (without having to reboot the system), when scheduler debugging is enabled (CONFIG_SCHED_DEBUG=y), by issuing the following command: # echo NO_OWNER_SPIN > /debug/sched_features This command re-enables spinning again (this is also the default): # echo OWNER_SPIN > /debug/sched_features Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2009-01-12 08:01:47 -0500
committer: Ingo Molnar <mingo@elte.hu> 2009-01-14 12:09:02 -0500
commit: 0d66bf6d3514b35eb6897629059443132992dbd7 (patch)
tree: a47ee0fc3299361cf3b222c8242741adfedaab74 /kernel/sched.c
parent: 41719b03091911028116155deddc5eedf8c45e37 (diff)
1 files changed, 61 insertions, 0 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index b001c133c359..589e7308c615 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4614,6 +4614,67 @@ need_resched:
 }
 EXPORT_SYMBOL(schedule);
+#ifdef CONFIG_SMP
+/*
+ * Look out! "owner" is an entirely speculative pointer
+ * access and not reliable.
+ */
+int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
+{
+        unsigned int cpu;
+        struct rq *rq;
+        if (!sched_feat(OWNER_SPIN))
+                return 0;
+#ifdef CONFIG_DEBUG_PAGEALLOC
+        /*
+         * Need to access the cpu field knowing that
+         * DEBUG_PAGEALLOC could have unmapped it if
+         * the mutex owner just released it and exited.
+         */
+        if (probe_kernel_address(&owner->cpu, cpu))
+                goto out;
+#else
+        cpu = owner->cpu;
+#endif
+        /*
+         * Even if the access succeeded (likely case),
+         * the cpu field may no longer be valid.
+         */
+        if (cpu >= nr_cpumask_bits)
+                goto out;
+        /*
+         * We need to validate that we can do a
+         * get_cpu() and that we have the percpu area.
+         */
+        if (!cpu_online(cpu))
+                goto out;
+        rq = cpu_rq(cpu);
+        for (;;) {
+                /*
+                 * Owner changed, break to re-assess state.
+                 */
+                if (lock->owner != owner)
+                        break;
+                /*
+                 * Is that owner really running on that cpu?
+                 */
+                if (task_thread_info(rq->curr) != owner || need_resched())
+                        return 0;
+                cpu_relax();
+        }
+out:
+        return 1;
+}
+#endif
 #ifdef CONFIG_PREEMPT
 /*
 * this is the entry point to schedule() from in-kernel preemption
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2009-01-12 08:01:47 -0500
committer	Ingo Molnar <mingo@elte.hu>	2009-01-14 12:09:02 -0500
commit	0d66bf6d3514b35eb6897629059443132992dbd7 (patch)
tree	a47ee0fc3299361cf3b222c8242741adfedaab74 /kernel/sched.c
parent	41719b03091911028116155deddc5eedf8c45e37 (diff)

diff --git a/kernel/sched.c b/kernel/sched.c index b001c133c359..589e7308c615 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -4614,6 +4614,67 @@ need_resched:
4614	}	4614	}
4615	EXPORT_SYMBOL(schedule);	4615	EXPORT_SYMBOL(schedule);
4616		4616
		4617	#ifdef CONFIG_SMP
		4618	/*
		4619	* Look out! "owner" is an entirely speculative pointer
		4620	* access and not reliable.
		4621	*/
		4622	int mutex_spin_on_owner(struct mutex lock, struct thread_info owner)
		4623	{
		4624	unsigned int cpu;
		4625	struct rq *rq;
		4626
		4627	if (!sched_feat(OWNER_SPIN))
		4628	return 0;
		4629
		4630	#ifdef CONFIG_DEBUG_PAGEALLOC
		4631	/*
		4632	* Need to access the cpu field knowing that
		4633	* DEBUG_PAGEALLOC could have unmapped it if
		4634	* the mutex owner just released it and exited.
		4635	*/
		4636	if (probe_kernel_address(&owner->cpu, cpu))
		4637	goto out;
		4638	#else
		4639	cpu = owner->cpu;
		4640	#endif
		4641
		4642	/*
		4643	* Even if the access succeeded (likely case),
		4644	* the cpu field may no longer be valid.
		4645	*/
		4646	if (cpu >= nr_cpumask_bits)
		4647	goto out;
		4648
		4649	/*
		4650	* We need to validate that we can do a
		4651	* get_cpu() and that we have the percpu area.
		4652	*/
		4653	if (!cpu_online(cpu))
		4654	goto out;
		4655
		4656	rq = cpu_rq(cpu);
		4657
		4658	for (;;) {
		4659	/*
		4660	* Owner changed, break to re-assess state.
		4661	*/
		4662	if (lock->owner != owner)
		4663	break;
		4664
		4665	/*
		4666	* Is that owner really running on that cpu?
		4667	*/
		4668	if (task_thread_info(rq->curr) != owner \|\| need_resched())
		4669	return 0;
		4670
		4671	cpu_relax();
		4672	}
		4673	out:
		4674	return 1;
		4675	}
		4676	#endif
		4677
4617	#ifdef CONFIG_PREEMPT	4678	#ifdef CONFIG_PREEMPT
4618	/*	4679	/*
4619	* this is the entry point to schedule() from in-kernel preemption	4680	* this is the entry point to schedule() from in-kernel preemption