aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Ellerman <michael@ellerman.id.au>2013-06-13 07:04:56 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2013-06-14 22:21:57 -0400
commit0e37739b1c96d65e6433998454985de994383019 (patch)
treea7658efaae346556e362a68b059f46ac8eed31fb
parent34376a50fb1fa095b9d0636fa41ed2e73125f214 (diff)
powerpc: Fix stack overflow crash in resume_kernel when ftracing
It's possible for us to crash when running with ftrace enabled, eg: Bad kernel stack pointer bffffd12 at c00000000000a454 cpu 0x3: Vector: 300 (Data Access) at [c00000000ffe3d40] pc: c00000000000a454: resume_kernel+0x34/0x60 lr: c00000000000335c: performance_monitor_common+0x15c/0x180 sp: bffffd12 msr: 8000000000001032 dar: bffffd12 dsisr: 42000000 If we look at current's stack (paca->__current->stack) we see it is equal to c0000002ecab0000. Our stack is 16K, and comparing to paca->kstack (c0000002ecab3e30) we can see that we have overflowed our kernel stack. This leads to us writing over our struct thread_info, and in this case we have corrupted thread_info->flags and set _TIF_EMULATE_STACK_STORE. Dumping the stack we see: 3:mon> t c0000002ecab0000 [c0000002ecab0000] c00000000002131c .performance_monitor_exception+0x5c/0x70 [c0000002ecab0080] c00000000000335c performance_monitor_common+0x15c/0x180 --- Exception: f01 (Performance Monitor) at c0000000000fb2ec .trace_hardirqs_off+0x1c/0x30 [c0000002ecab0370] c00000000016fdb0 .trace_graph_entry+0xb0/0x280 (unreliable) [c0000002ecab0410] c00000000003d038 .prepare_ftrace_return+0x98/0x130 [c0000002ecab04b0] c00000000000a920 .ftrace_graph_caller+0x14/0x28 [c0000002ecab0520] c0000000000d6b58 .idle_cpu+0x18/0x90 [c0000002ecab05a0] c00000000000a934 .return_to_handler+0x0/0x34 [c0000002ecab0620] c00000000001e660 .timer_interrupt+0x160/0x300 [c0000002ecab06d0] c0000000000025dc decrementer_common+0x15c/0x180 --- Exception: 901 (Decrementer) at c0000000000104d4 .arch_local_irq_restore+0x74/0xa0 [c0000002ecab09c0] c0000000000fe044 .trace_hardirqs_on+0x14/0x30 (unreliable) [c0000002ecab0fb0] c00000000016fe3c .trace_graph_entry+0x13c/0x280 [c0000002ecab1050] c00000000003d038 .prepare_ftrace_return+0x98/0x130 [c0000002ecab10f0] c00000000000a920 .ftrace_graph_caller+0x14/0x28 [c0000002ecab1160] c0000000000161f0 .__ppc64_runlatch_on+0x10/0x40 [c0000002ecab11d0] c00000000000a934 .return_to_handler+0x0/0x34 --- Exception: 901 (Decrementer) at c0000000000104d4 .arch_local_irq_restore+0x74/0xa0 ... and so on __ppc64_runlatch_on() is called from RUNLATCH_ON in the exception entry path. At that point the irq state is not consistent, ie. interrupts are hard disabled (by the exception entry), but the paca soft-enabled flag may be out of sync. This leads to the local_irq_restore() in trace_graph_entry() actually enabling interrupts, which we do not want. Because we have not yet reprogrammed the decrementer we immediately take another decrementer exception, and recurse. The fix is twofold. Firstly make sure we call DISABLE_INTS before calling RUNLATCH_ON. The badly named DISABLE_INTS actually reconciles the irq state in the paca with the hardware, making it safe again to call local_irq_save/restore(). Although that should be sufficient to fix the bug, we also mark the runlatch routines as notrace. They are called very early in the exception entry and we are asking for trouble tracing them. They are also fairly uninteresting and tracing them just adds unnecessary overhead. [ This regression was introduced by fe1952fc0afb9a2e4c79f103c08aef5d13db1873 "powerpc: Rework runlatch code" by myself --BenH ] CC: <stable@vger.kernel.org> [v3.4+] Signed-off-by: Michael Ellerman <michael@ellerman.id.au> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/exception-64s.h2
-rw-r--r--arch/powerpc/kernel/process.c4
2 files changed, 3 insertions, 3 deletions
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 8e5fae8beaf6..46793b58a761 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -513,7 +513,7 @@ label##_common: \
513 */ 513 */
514#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \ 514#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \
515 EXCEPTION_COMMON(trap, label, hdlr, ret_from_except_lite, \ 515 EXCEPTION_COMMON(trap, label, hdlr, ret_from_except_lite, \
516 FINISH_NAP;RUNLATCH_ON;DISABLE_INTS) 516 FINISH_NAP;DISABLE_INTS;RUNLATCH_ON)
517 517
518/* 518/*
519 * When the idle code in power4_idle puts the CPU into NAP mode, 519 * When the idle code in power4_idle puts the CPU into NAP mode,
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index b0f3e3f77e72..076d1242507a 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1369,7 +1369,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
1369 1369
1370#ifdef CONFIG_PPC64 1370#ifdef CONFIG_PPC64
1371/* Called with hard IRQs off */ 1371/* Called with hard IRQs off */
1372void __ppc64_runlatch_on(void) 1372void notrace __ppc64_runlatch_on(void)
1373{ 1373{
1374 struct thread_info *ti = current_thread_info(); 1374 struct thread_info *ti = current_thread_info();
1375 unsigned long ctrl; 1375 unsigned long ctrl;
@@ -1382,7 +1382,7 @@ void __ppc64_runlatch_on(void)
1382} 1382}
1383 1383
1384/* Called with hard IRQs off */ 1384/* Called with hard IRQs off */
1385void __ppc64_runlatch_off(void) 1385void notrace __ppc64_runlatch_off(void)
1386{ 1386{
1387 struct thread_info *ti = current_thread_info(); 1387 struct thread_info *ti = current_thread_info();
1388 unsigned long ctrl; 1388 unsigned long ctrl;