4 files changed, 95 insertions, 81 deletions
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 6a6409adc564..e856218da90d 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -186,17 +186,6 @@ config PREEMPT
          Say Y here if you are building a kernel for a desktop, embedded
          or real-time system.  Say N if you are unsure.
-config PREEMPT_BKL
-        bool "Preempt The Big Kernel Lock"
-        depends on PREEMPT
-        default y
-        help
-          This option reduces the latency of the kernel by making the
-          big kernel lock preemptible.
-          Say Y here if you are building a kernel for a desktop system.
-          Say N if you are unsure.
 config MN10300_CURRENT_IN_E2
        bool "Hold current task address in E2 register"
        default y
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 897f723bd222..181006cc94a0 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -72,6 +72,14 @@
 #define in_softirq()            (softirq_count())
 #define in_interrupt()          (irq_count())
+#if defined(CONFIG_PREEMPT)
+# define PREEMPT_INATOMIC_BASE kernel_locked()
+# define PREEMPT_CHECK_OFFSET 1
+#else
+# define PREEMPT_INATOMIC_BASE 0
+# define PREEMPT_CHECK_OFFSET 0
+#endif
 /*
 * Are we running in atomic context?  WARNING: this macro cannot
 * always detect atomic context; in particular, it cannot know about
@@ -79,17 +87,11 @@
 * used in the general case to determine whether sleeping is possible.
 * Do not use in_atomic() in driver code.
 */
-#define in_atomic()             ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
+#define in_atomic()     ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_INATOMIC_BASE)
-#ifdef CONFIG_PREEMPT
-# define PREEMPT_CHECK_OFFSET 1
-#else
-# define PREEMPT_CHECK_OFFSET 0
-#endif
 /*
 * Check whether we were atomic before we did preempt_disable():
- * (used by the scheduler)
+ * (used by the scheduler, *after* releasing the kernel lock)
 */
 #define in_atomic_preempt_off() \
                ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
diff --git a/kernel/sched.c b/kernel/sched.c
index 58fb8af15776..c51b6565e07c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4567,8 +4567,6 @@ EXPORT_SYMBOL(schedule);
 asmlinkage void __sched preempt_schedule(void)
 {
        struct thread_info *ti = current_thread_info();
-        struct task_struct *task = current;
-        int saved_lock_depth;
        /*
         * If there is a non-zero preempt_count or interrupts are disabled,
@@ -4579,16 +4577,7 @@ asmlinkage void __sched preempt_schedule(void)
        do {
                add_preempt_count(PREEMPT_ACTIVE);
-                /*
-                 * We keep the big kernel semaphore locked, but we
-                 * clear ->lock_depth so that schedule() doesnt
-                 * auto-release the semaphore:
-                 */
-                saved_lock_depth = task->lock_depth;
-                task->lock_depth = -1;
                schedule();
-                task->lock_depth = saved_lock_depth;
                sub_preempt_count(PREEMPT_ACTIVE);
                /*
@@ -4609,26 +4598,15 @@ EXPORT_SYMBOL(preempt_schedule);
 asmlinkage void __sched preempt_schedule_irq(void)
 {
        struct thread_info *ti = current_thread_info();
-        struct task_struct *task = current;
-        int saved_lock_depth;
        /* Catch callers which need to be fixed */
        BUG_ON(ti->preempt_count || !irqs_disabled());
        do {
                add_preempt_count(PREEMPT_ACTIVE);
-                /*
-                 * We keep the big kernel semaphore locked, but we
-                 * clear ->lock_depth so that schedule() doesnt
-                 * auto-release the semaphore:
-                 */
-                saved_lock_depth = task->lock_depth;
-                task->lock_depth = -1;
                local_irq_enable();
                schedule();
                local_irq_disable();
-                task->lock_depth = saved_lock_depth;
                sub_preempt_count(PREEMPT_ACTIVE);
                /*
@@ -5853,8 +5831,11 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
        spin_unlock_irqrestore(&rq->lock, flags);
        /* Set the preempt count _outside_ the spinlocks! */
+#if defined(CONFIG_PREEMPT)
+        task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
+#else
        task_thread_info(idle)->preempt_count = 0;
+#endif
        /*
         * The idle tasks have their own, simple scheduling class:
         */
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index cd3e82530b03..01a3c22c1b5a 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -11,79 +11,121 @@
 #include <linux/semaphore.h>
 /*
- * The 'big kernel semaphore'
+ * The 'big kernel lock'
 *
- * This mutex is taken and released recursively by lock_kernel()
+ * This spinlock is taken and released recursively by lock_kernel()
 * and unlock_kernel().  It is transparently dropped and reacquired
 * over schedule().  It is used to protect legacy code that hasn't
 * been migrated to a proper locking design yet.
 *
- * Note: code locked by this semaphore will only be serialized against
- * other code using the same locking facility. The code guarantees that
- * the task remains on the same CPU.
- *
 * Don't use in new code.
 */
-static DECLARE_MUTEX(kernel_sem);
+static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag);
 /*
- * Re-acquire the kernel semaphore.
+ * Acquire/release the underlying lock from the scheduler.
 *
- * This function is called with preemption off.
+ * This is called with preemption disabled, and should
+ * return an error value if it cannot get the lock and
+ * TIF_NEED_RESCHED gets set.
 *
- * We are executing in schedule() so the code must be extremely careful
+ * If it successfully gets the lock, it should increment
- * about recursion, both due to the down() and due to the enabling of
+ * the preemption count like any spinlock does.
- * preemption. schedule() will re-check the preemption flag after
+ *
- * reacquiring the semaphore.
+ * (This works on UP too - _raw_spin_trylock will never
+ * return false in that case)
 */
 int __lockfunc __reacquire_kernel_lock(void)
 {
-        struct task_struct *task = current;
+        while (!_raw_spin_trylock(&kernel_flag)) {
-        int saved_lock_depth = task->lock_depth;
+                if (test_thread_flag(TIF_NEED_RESCHED))
+                        return -EAGAIN;
-        BUG_ON(saved_lock_depth < 0);
+                cpu_relax();
+        }
-        task->lock_depth = -1;
-        preempt_enable_no_resched();
-        down(&kernel_sem);
        preempt_disable();
-        task->lock_depth = saved_lock_depth;
        return 0;
 }
 void __lockfunc __release_kernel_lock(void)
 {
-        up(&kernel_sem);
+        _raw_spin_unlock(&kernel_flag);
+        preempt_enable_no_resched();
 }
 /*
- * Getting the big kernel semaphore.
+ * These are the BKL spinlocks - we try to be polite about preemption.
+ * If SMP is not on (ie UP preemption), this all goes away because the
+ * _raw_spin_trylock() will always succeed.
 */
-void __lockfunc lock_kernel(void)
+#ifdef CONFIG_PREEMPT
+static inline void __lock_kernel(void)
 {
-        struct task_struct *task = current;
+        preempt_disable();
-        int depth = task->lock_depth + 1;
+        if (unlikely(!_raw_spin_trylock(&kernel_flag))) {
+                /*
+                 * If preemption was disabled even before this
+                 * was called, there's nothing we can be polite
+                 * about - just spin.
+                 */
+                if (preempt_count() > 1) {
+                        _raw_spin_lock(&kernel_flag);
+                        return;
+                }
-        if (likely(!depth))
                /*
-                 * No recursion worries - we set up lock_depth _after_
+                 * Otherwise, let's wait for the kernel lock
+                 * with preemption enabled..
                 */
-                down(&kernel_sem);
+                do {
+                        preempt_enable();
+                        while (spin_is_locked(&kernel_flag))
+                                cpu_relax();
+                        preempt_disable();
+                } while (!_raw_spin_trylock(&kernel_flag));
+        }
+}
-        task->lock_depth = depth;
+#else
+/*
+ * Non-preemption case - just get the spinlock
+ */
+static inline void __lock_kernel(void)
+{
+        _raw_spin_lock(&kernel_flag);
 }
+#endif
-void __lockfunc unlock_kernel(void)
+static inline void __unlock_kernel(void)
 {
-        struct task_struct *task = current;
+        /*
+         * the BKL is not covered by lockdep, so we open-code the
+         * unlocking sequence (and thus avoid the dep-chain ops):
+         */
+        _raw_spin_unlock(&kernel_flag);
+        preempt_enable();
+}
-        BUG_ON(task->lock_depth < 0);
+/*
+ * Getting the big kernel lock.
+ *
+ * This cannot happen asynchronously, so we only need to
+ * worry about other CPU's.
+ */
+void __lockfunc lock_kernel(void)
+{
+        int depth = current->lock_depth+1;
+        if (likely(!depth))
+                __lock_kernel();
+        current->lock_depth = depth;
+}
-        if (likely(--task->lock_depth < 0))
+void __lockfunc unlock_kernel(void)
-                up(&kernel_sem);
+{
+        BUG_ON(current->lock_depth < 0);
+        if (likely(--current->lock_depth < 0))
+                __unlock_kernel();
 }
 EXPORT_SYMBOL(lock_kernel);

diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 6a6409adc564..e856218da90d 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig
@@ -186,17 +186,6 @@ config PREEMPT
186	Say Y here if you are building a kernel for a desktop, embedded	186	Say Y here if you are building a kernel for a desktop, embedded
187	or real-time system. Say N if you are unsure.	187	or real-time system. Say N if you are unsure.
188		188
189	config PREEMPT_BKL
190	bool "Preempt The Big Kernel Lock"
191	depends on PREEMPT
192	default y
193	help
194	This option reduces the latency of the kernel by making the
195	big kernel lock preemptible.
196
197	Say Y here if you are building a kernel for a desktop system.
198	Say N if you are unsure.
199
200	config MN10300_CURRENT_IN_E2	189	config MN10300_CURRENT_IN_E2
201	bool "Hold current task address in E2 register"	190	bool "Hold current task address in E2 register"
202	default y	191	default y


diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 897f723bd222..181006cc94a0 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h
@@ -72,6 +72,14 @@
72	#define in_softirq() (softirq_count())	72	#define in_softirq() (softirq_count())
73	#define in_interrupt() (irq_count())	73	#define in_interrupt() (irq_count())
74		74
		75	#if defined(CONFIG_PREEMPT)
		76	# define PREEMPT_INATOMIC_BASE kernel_locked()
		77	# define PREEMPT_CHECK_OFFSET 1
		78	#else
		79	# define PREEMPT_INATOMIC_BASE 0
		80	# define PREEMPT_CHECK_OFFSET 0
		81	#endif
		82
75	/*	83	/*
76	* Are we running in atomic context? WARNING: this macro cannot	84	* Are we running in atomic context? WARNING: this macro cannot
77	* always detect atomic context; in particular, it cannot know about	85	* always detect atomic context; in particular, it cannot know about
@@ -79,17 +87,11 @@
79	* used in the general case to determine whether sleeping is possible.	87	* used in the general case to determine whether sleeping is possible.
80	* Do not use in_atomic() in driver code.	88	* Do not use in_atomic() in driver code.
81	*/	89	*/
82	#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)	90	#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_INATOMIC_BASE)
83
84	#ifdef CONFIG_PREEMPT
85	# define PREEMPT_CHECK_OFFSET 1
86	#else
87	# define PREEMPT_CHECK_OFFSET 0
88	#endif
89		91
90	/*	92	/*
91	* Check whether we were atomic before we did preempt_disable():	93	* Check whether we were atomic before we did preempt_disable():
92	* (used by the scheduler)	94	* (used by the scheduler, after releasing the kernel lock)
93	*/	95	*/
94	#define in_atomic_preempt_off() \	96	#define in_atomic_preempt_off() \
95	((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)	97	((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)


diff --git a/kernel/sched.c b/kernel/sched.c index 58fb8af15776..c51b6565e07c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -4567,8 +4567,6 @@ EXPORT_SYMBOL(schedule);
4567	asmlinkage void __sched preempt_schedule(void)	4567	asmlinkage void __sched preempt_schedule(void)
4568	{	4568	{
4569	struct thread_info *ti = current_thread_info();	4569	struct thread_info *ti = current_thread_info();
4570	struct task_struct *task = current;
4571	int saved_lock_depth;
4572		4570
4573	/*	4571	/*
4574	* If there is a non-zero preempt_count or interrupts are disabled,	4572	* If there is a non-zero preempt_count or interrupts are disabled,
@@ -4579,16 +4577,7 @@ asmlinkage void __sched preempt_schedule(void)
4579		4577
4580	do {	4578	do {
4581	add_preempt_count(PREEMPT_ACTIVE);	4579	add_preempt_count(PREEMPT_ACTIVE);
4582
4583	/*
4584	* We keep the big kernel semaphore locked, but we
4585	* clear ->lock_depth so that schedule() doesnt
4586	* auto-release the semaphore:
4587	*/
4588	saved_lock_depth = task->lock_depth;
4589	task->lock_depth = -1;
4590	schedule();	4580	schedule();
4591	task->lock_depth = saved_lock_depth;
4592	sub_preempt_count(PREEMPT_ACTIVE);	4581	sub_preempt_count(PREEMPT_ACTIVE);
4593		4582
4594	/*	4583	/*
@@ -4609,26 +4598,15 @@ EXPORT_SYMBOL(preempt_schedule);
4609	asmlinkage void __sched preempt_schedule_irq(void)	4598	asmlinkage void __sched preempt_schedule_irq(void)
4610	{	4599	{
4611	struct thread_info *ti = current_thread_info();	4600	struct thread_info *ti = current_thread_info();
4612	struct task_struct *task = current;
4613	int saved_lock_depth;
4614		4601
4615	/* Catch callers which need to be fixed */	4602	/* Catch callers which need to be fixed */
4616	BUG_ON(ti->preempt_count \|\| !irqs_disabled());	4603	BUG_ON(ti->preempt_count \|\| !irqs_disabled());
4617		4604
4618	do {	4605	do {
4619	add_preempt_count(PREEMPT_ACTIVE);	4606	add_preempt_count(PREEMPT_ACTIVE);
4620
4621	/*
4622	* We keep the big kernel semaphore locked, but we
4623	* clear ->lock_depth so that schedule() doesnt
4624	* auto-release the semaphore:
4625	*/
4626	saved_lock_depth = task->lock_depth;
4627	task->lock_depth = -1;
4628	local_irq_enable();	4607	local_irq_enable();
4629	schedule();	4608	schedule();
4630	local_irq_disable();	4609	local_irq_disable();
4631	task->lock_depth = saved_lock_depth;
4632	sub_preempt_count(PREEMPT_ACTIVE);	4610	sub_preempt_count(PREEMPT_ACTIVE);
4633		4611
4634	/*	4612	/*
@@ -5853,8 +5831,11 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5853	spin_unlock_irqrestore(&rq->lock, flags);	5831	spin_unlock_irqrestore(&rq->lock, flags);
5854		5832
5855	/* Set the preempt count _outside_ the spinlocks! */	5833	/* Set the preempt count _outside_ the spinlocks! */
		5834	#if defined(CONFIG_PREEMPT)
		5835	task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
		5836	#else
5856	task_thread_info(idle)->preempt_count = 0;	5837	task_thread_info(idle)->preempt_count = 0;
5857		5838	#endif
5858	/*	5839	/*
5859	* The idle tasks have their own, simple scheduling class:	5840	* The idle tasks have their own, simple scheduling class:
5860	*/	5841	*/


diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index cd3e82530b03..01a3c22c1b5a 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c
@@ -11,79 +11,121 @@
11	#include <linux/semaphore.h>	11	#include <linux/semaphore.h>
12		12
13	/*	13	/*
14	* The 'big kernel semaphore'	14	* The 'big kernel lock'
15	*	15	*
16	* This mutex is taken and released recursively by lock_kernel()	16	* This spinlock is taken and released recursively by lock_kernel()
17	* and unlock_kernel(). It is transparently dropped and reacquired	17	* and unlock_kernel(). It is transparently dropped and reacquired
18	* over schedule(). It is used to protect legacy code that hasn't	18	* over schedule(). It is used to protect legacy code that hasn't
19	* been migrated to a proper locking design yet.	19	* been migrated to a proper locking design yet.
20	*	20	*
21	* Note: code locked by this semaphore will only be serialized against
22	* other code using the same locking facility. The code guarantees that
23	* the task remains on the same CPU.
24	*
25	* Don't use in new code.	21	* Don't use in new code.
26	*/	22	*/
27	static DECLARE_MUTEX(kernel_sem);	23	static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag);
		24
28		25
29	/*	26	/*
30	* Re-acquire the kernel semaphore.	27	* Acquire/release the underlying lock from the scheduler.
31	*	28	*
32	* This function is called with preemption off.	29	* This is called with preemption disabled, and should
		30	* return an error value if it cannot get the lock and
		31	* TIF_NEED_RESCHED gets set.
33	*	32	*
34	* We are executing in schedule() so the code must be extremely careful	33	* If it successfully gets the lock, it should increment
35	* about recursion, both due to the down() and due to the enabling of	34	* the preemption count like any spinlock does.
36	* preemption. schedule() will re-check the preemption flag after	35	*
37	* reacquiring the semaphore.	36	* (This works on UP too - _raw_spin_trylock will never
		37	* return false in that case)
38	*/	38	*/
39	int __lockfunc __reacquire_kernel_lock(void)	39	int __lockfunc __reacquire_kernel_lock(void)
40	{	40	{
41	struct task_struct *task = current;	41	while (!_raw_spin_trylock(&kernel_flag)) {
42	int saved_lock_depth = task->lock_depth;	42	if (test_thread_flag(TIF_NEED_RESCHED))
43		43	return -EAGAIN;
44	BUG_ON(saved_lock_depth < 0);	44	cpu_relax();
45		45	}
46	task->lock_depth = -1;
47	preempt_enable_no_resched();
48
49	down(&kernel_sem);
50
51	preempt_disable();	46	preempt_disable();
52	task->lock_depth = saved_lock_depth;
53
54	return 0;	47	return 0;
55	}	48	}
56		49
57	void __lockfunc __release_kernel_lock(void)	50	void __lockfunc __release_kernel_lock(void)
58	{	51	{
59	up(&kernel_sem);	52	_raw_spin_unlock(&kernel_flag);
		53	preempt_enable_no_resched();
60	}	54	}
61		55
62	/*	56	/*
63	* Getting the big kernel semaphore.	57	* These are the BKL spinlocks - we try to be polite about preemption.
		58	* If SMP is not on (ie UP preemption), this all goes away because the
		59	* _raw_spin_trylock() will always succeed.
64	*/	60	*/
65	void __lockfunc lock_kernel(void)	61	#ifdef CONFIG_PREEMPT
		62	static inline void __lock_kernel(void)
66	{	63	{
67	struct task_struct *task = current;	64	preempt_disable();
68	int depth = task->lock_depth + 1;	65	if (unlikely(!_raw_spin_trylock(&kernel_flag))) {
		66	/*
		67	* If preemption was disabled even before this
		68	* was called, there's nothing we can be polite
		69	* about - just spin.
		70	*/
		71	if (preempt_count() > 1) {
		72	_raw_spin_lock(&kernel_flag);
		73	return;
		74	}
69		75
70	if (likely(!depth))
71	/*	76	/*
72	* No recursion worries - we set up lock_depth _after_	77	* Otherwise, let's wait for the kernel lock
		78	* with preemption enabled..
73	*/	79	*/
74	down(&kernel_sem);	80	do {
		81	preempt_enable();
		82	while (spin_is_locked(&kernel_flag))
		83	cpu_relax();
		84	preempt_disable();
		85	} while (!_raw_spin_trylock(&kernel_flag));
		86	}
		87	}
75		88
76	task->lock_depth = depth;	89	#else
		90
		91	/*
		92	* Non-preemption case - just get the spinlock
		93	*/
		94	static inline void __lock_kernel(void)
		95	{
		96	_raw_spin_lock(&kernel_flag);
77	}	97	}
		98	#endif
78		99
79	void __lockfunc unlock_kernel(void)	100	static inline void __unlock_kernel(void)
80	{	101	{
81	struct task_struct *task = current;	102	/*
		103	* the BKL is not covered by lockdep, so we open-code the
		104	* unlocking sequence (and thus avoid the dep-chain ops):
		105	*/
		106	_raw_spin_unlock(&kernel_flag);
		107	preempt_enable();
		108	}
82		109
83	BUG_ON(task->lock_depth < 0);	110	/*
		111	* Getting the big kernel lock.
		112	*
		113	* This cannot happen asynchronously, so we only need to
		114	* worry about other CPU's.
		115	*/
		116	void __lockfunc lock_kernel(void)
		117	{
		118	int depth = current->lock_depth+1;
		119	if (likely(!depth))
		120	__lock_kernel();
		121	current->lock_depth = depth;
		122	}
84		123
85	if (likely(--task->lock_depth < 0))	124	void __lockfunc unlock_kernel(void)
86	up(&kernel_sem);	125	{
		126	BUG_ON(current->lock_depth < 0);
		127	if (likely(--current->lock_depth < 0))
		128	__unlock_kernel();
87	}	129	}
88		130
89	EXPORT_SYMBOL(lock_kernel);	131	EXPORT_SYMBOL(lock_kernel);