BKL: revert back to the old spinlock implementation

The generic semaphore rewrite had a huge performance regression on AIM7 (and potentially other BKL-heavy benchmarks) because the generic semaphores had been rewritten to be simple to understand and fair. The latter, in particular, turns a semaphore-based BKL implementation into a mess of scheduling. The attempt to fix the performance regression failed miserably (see the previous commit 00b41ec2611dc98f87f30753ee00a53db648d662 'Revert "semaphore: fix"'), and so for now the simple and sane approach is to instead just go back to the old spinlock-based BKL implementation that never had any issues like this. This patch also has the advantage of being reported to fix the regression completely according to Yanmin Zhang, unlike the semaphore hack which still left a couple percentage point regression. As a spinlock, the BKL obviously has the potential to be a latency issue, but it's not really any different from any other spinlock in that respect. We do want to get rid of the BKL asap, but that has been the plan for several years. These days, the biggest users are in the tty layer (open/release in particular) and Alan holds out some hope: "tty release is probably a few months away from getting cured - I'm afraid it will almost certainly be the very last user of the BKL in tty to get fixed as it depends on everything else being sanely locked." so while we're not there yet, we do have a plan of action. Tested-by: Yanmin Zhang <yanmin_zhang@linux.intel.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Andi Kleen <andi@firstfloor.org> Cc: Matthew Wilcox <matthew@wil.cx> Cc: Alexander Viro <viro@ftp.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Linus Torvalds <torvalds@linux-foundation.org> 2008-05-10 23:58:02 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-05-10 23:58:02 -0400
commit: 8e3e076c5a78519a9f64cd384e8f18bc21882ce0 (patch)
tree: f032258fde3aa4771e86bf4552fe4530c221dec3 /kernel
parent: 00b41ec2611dc98f87f30753ee00a53db648d662 (diff)
1 files changed, 4 insertions, 23 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 58fb8af15776..c51b6565e07c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4567,8 +4567,6 @@ EXPORT_SYMBOL(schedule);
 asmlinkage void __sched preempt_schedule(void)
 {
        struct thread_info *ti = current_thread_info();
-        struct task_struct *task = current;
-        int saved_lock_depth;
        /*
         * If there is a non-zero preempt_count or interrupts are disabled,
@@ -4579,16 +4577,7 @@ asmlinkage void __sched preempt_schedule(void)
        do {
                add_preempt_count(PREEMPT_ACTIVE);
-                /*
-                 * We keep the big kernel semaphore locked, but we
-                 * clear ->lock_depth so that schedule() doesnt
-                 * auto-release the semaphore:
-                 */
-                saved_lock_depth = task->lock_depth;
-                task->lock_depth = -1;
                schedule();
-                task->lock_depth = saved_lock_depth;
                sub_preempt_count(PREEMPT_ACTIVE);
                /*
@@ -4609,26 +4598,15 @@ EXPORT_SYMBOL(preempt_schedule);
 asmlinkage void __sched preempt_schedule_irq(void)
 {
        struct thread_info *ti = current_thread_info();
-        struct task_struct *task = current;
-        int saved_lock_depth;
        /* Catch callers which need to be fixed */
        BUG_ON(ti->preempt_count || !irqs_disabled());
        do {
                add_preempt_count(PREEMPT_ACTIVE);
-                /*
-                 * We keep the big kernel semaphore locked, but we
-                 * clear ->lock_depth so that schedule() doesnt
-                 * auto-release the semaphore:
-                 */
-                saved_lock_depth = task->lock_depth;
-                task->lock_depth = -1;
                local_irq_enable();
                schedule();
                local_irq_disable();
-                task->lock_depth = saved_lock_depth;
                sub_preempt_count(PREEMPT_ACTIVE);
                /*
@@ -5853,8 +5831,11 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
        spin_unlock_irqrestore(&rq->lock, flags);
        /* Set the preempt count _outside_ the spinlocks! */
+#if defined(CONFIG_PREEMPT)
+        task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
+#else
        task_thread_info(idle)->preempt_count = 0;
+#endif
        /*
         * The idle tasks have their own, simple scheduling class:
         */
author	Linus Torvalds <torvalds@linux-foundation.org>	2008-05-10 23:58:02 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-05-10 23:58:02 -0400
commit	8e3e076c5a78519a9f64cd384e8f18bc21882ce0 (patch)
tree	f032258fde3aa4771e86bf4552fe4530c221dec3 /kernel
parent	00b41ec2611dc98f87f30753ee00a53db648d662 (diff)

diff --git a/kernel/sched.c b/kernel/sched.c index 58fb8af15776..c51b6565e07c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -4567,8 +4567,6 @@ EXPORT_SYMBOL(schedule);
4567	asmlinkage void __sched preempt_schedule(void)	4567	asmlinkage void __sched preempt_schedule(void)
4568	{	4568	{
4569	struct thread_info *ti = current_thread_info();	4569	struct thread_info *ti = current_thread_info();
4570	struct task_struct *task = current;
4571	int saved_lock_depth;
4572		4570
4573	/*	4571	/*
4574	* If there is a non-zero preempt_count or interrupts are disabled,	4572	* If there is a non-zero preempt_count or interrupts are disabled,
@@ -4579,16 +4577,7 @@ asmlinkage void __sched preempt_schedule(void)
4579		4577
4580	do {	4578	do {
4581	add_preempt_count(PREEMPT_ACTIVE);	4579	add_preempt_count(PREEMPT_ACTIVE);
4582
4583	/*
4584	* We keep the big kernel semaphore locked, but we
4585	* clear ->lock_depth so that schedule() doesnt
4586	* auto-release the semaphore:
4587	*/
4588	saved_lock_depth = task->lock_depth;
4589	task->lock_depth = -1;
4590	schedule();	4580	schedule();
4591	task->lock_depth = saved_lock_depth;
4592	sub_preempt_count(PREEMPT_ACTIVE);	4581	sub_preempt_count(PREEMPT_ACTIVE);
4593		4582
4594	/*	4583	/*
@@ -4609,26 +4598,15 @@ EXPORT_SYMBOL(preempt_schedule);
4609	asmlinkage void __sched preempt_schedule_irq(void)	4598	asmlinkage void __sched preempt_schedule_irq(void)
4610	{	4599	{
4611	struct thread_info *ti = current_thread_info();	4600	struct thread_info *ti = current_thread_info();
4612	struct task_struct *task = current;
4613	int saved_lock_depth;
4614		4601
4615	/* Catch callers which need to be fixed */	4602	/* Catch callers which need to be fixed */
4616	BUG_ON(ti->preempt_count \|\| !irqs_disabled());	4603	BUG_ON(ti->preempt_count \|\| !irqs_disabled());
4617		4604
4618	do {	4605	do {
4619	add_preempt_count(PREEMPT_ACTIVE);	4606	add_preempt_count(PREEMPT_ACTIVE);
4620
4621	/*
4622	* We keep the big kernel semaphore locked, but we
4623	* clear ->lock_depth so that schedule() doesnt
4624	* auto-release the semaphore:
4625	*/
4626	saved_lock_depth = task->lock_depth;
4627	task->lock_depth = -1;
4628	local_irq_enable();	4607	local_irq_enable();
4629	schedule();	4608	schedule();
4630	local_irq_disable();	4609	local_irq_disable();
4631	task->lock_depth = saved_lock_depth;
4632	sub_preempt_count(PREEMPT_ACTIVE);	4610	sub_preempt_count(PREEMPT_ACTIVE);
4633		4611
4634	/*	4612	/*
@@ -5853,8 +5831,11 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5853	spin_unlock_irqrestore(&rq->lock, flags);	5831	spin_unlock_irqrestore(&rq->lock, flags);
5854		5832
5855	/* Set the preempt count _outside_ the spinlocks! */	5833	/* Set the preempt count _outside_ the spinlocks! */
		5834	#if defined(CONFIG_PREEMPT)
		5835	task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
		5836	#else
5856	task_thread_info(idle)->preempt_count = 0;	5837	task_thread_info(idle)->preempt_count = 0;
5857		5838	#endif
5858	/*	5839	/*
5859	* The idle tasks have their own, simple scheduling class:	5840	* The idle tasks have their own, simple scheduling class:
5860	*/	5841	*/