aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2012-05-30 11:54:53 -0400
committerSteven Rostedt <rostedt@goodmis.org>2012-05-31 23:12:22 -0400
commit5963e317b1e9d2a4511503916d8fd664bb8fa8fb (patch)
tree792e282724da9e5311f359de7809cde5ef3991a6
parentf8988175fd70874d1fb3712b1c5d3bfc6d455202 (diff)
ftrace/x86: Do not change stacks in DEBUG when calling lockdep
When both DYNAMIC_FTRACE and LOCKDEP are set, the TRACE_IRQS_ON/OFF will call into the lockdep code. The lockdep code can call lots of functions that may be traced by ftrace. When ftrace is updating its code and hits a breakpoint, the breakpoint handler will call into lockdep. If lockdep happens to call a function that also has a breakpoint attached, it will jump back into the breakpoint handler resetting the stack to the debug stack and corrupt the contents currently on that stack. The 'do_sym' call that calls do_int3() is protected by modifying the IST table to point to a different location if another breakpoint is hit. But the TRACE_IRQS_OFF/ON are outside that protection, and if a breakpoint is hit from those, the stack will get corrupted, and the kernel will crash: [ 1013.243754] BUG: unable to handle kernel NULL pointer dereference at 0000000000000002 [ 1013.272665] IP: [<ffff880145cc0000>] 0xffff880145cbffff [ 1013.285186] PGD 1401b2067 PUD 14324c067 PMD 0 [ 1013.298832] Oops: 0010 [#1] PREEMPT SMP [ 1013.310600] CPU 2 [ 1013.317904] Modules linked in: ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables crc32c_intel ghash_clmulni_intel microcode usb_debug serio_raw pcspkr iTCO_wdt i2c_i801 iTCO_vendor_support e1000e nfsd nfs_acl auth_rpcgss lockd sunrpc i915 video i2c_algo_bit drm_kms_helper drm i2c_core [last unloaded: scsi_wait_scan] [ 1013.401848] [ 1013.407399] Pid: 112, comm: kworker/2:1 Not tainted 3.4.0+ #30 [ 1013.437943] RIP: 8eb8:[<ffff88014630a000>] [<ffff88014630a000>] 0xffff880146309fff [ 1013.459871] RSP: ffffffff8165e919:ffff88014780f408 EFLAGS: 00010046 [ 1013.477909] RAX: 0000000000000001 RBX: ffffffff81104020 RCX: 0000000000000000 [ 1013.499458] RDX: ffff880148008ea8 RSI: ffffffff8131ef40 RDI: ffffffff82203b20 [ 1013.521612] RBP: ffffffff81005751 R08: 0000000000000000 R09: 0000000000000000 [ 1013.543121] R10: ffffffff82cdc318 R11: 0000000000000000 R12: ffff880145cc0000 [ 1013.564614] R13: ffff880148008eb8 R14: 0000000000000002 R15: ffff88014780cb40 [ 1013.586108] FS: 0000000000000000(0000) GS:ffff880148000000(0000) knlGS:0000000000000000 [ 1013.609458] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 1013.627420] CR2: 0000000000000002 CR3: 0000000141f10000 CR4: 00000000001407e0 [ 1013.649051] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1013.670724] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 1013.692376] Process kworker/2:1 (pid: 112, threadinfo ffff88013fe0e000, task ffff88014020a6a0) [ 1013.717028] Stack: [ 1013.724131] ffff88014780f570 ffff880145cc0000 0000400000004000 0000000000000000 [ 1013.745918] cccccccccccccccc ffff88014780cca8 ffffffff811072bb ffffffff81651627 [ 1013.767870] ffffffff8118f8a7 ffffffff811072bb ffffffff81f2b6c5 ffffffff81f11bdb [ 1013.790021] Call Trace: [ 1013.800701] Code: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a <e7> d7 64 81 ff ff ff ff 01 00 00 00 00 00 00 00 65 d9 64 81 ff [ 1013.861443] RIP [<ffff88014630a000>] 0xffff880146309fff [ 1013.884466] RSP <ffff88014780f408> [ 1013.901507] CR2: 0000000000000002 The solution was to reuse the NMI functions that change the IDT table to make the debug stack keep its current stack (in kernel mode) when hitting a breakpoint: call debug_stack_set_zero TRACE_IRQS_ON call debug_stack_reset If the TRACE_IRQS_ON happens to hit a breakpoint then it will keep the current stack and not crash the box. Reported-by: Dave Jones <davej@redhat.com> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
-rw-r--r--arch/x86/kernel/entry_64.S44
1 files changed, 41 insertions, 3 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 320852d02026..7d65133b51be 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -191,6 +191,44 @@ ENDPROC(native_usergs_sysret64)
191.endm 191.endm
192 192
193/* 193/*
194 * When dynamic function tracer is enabled it will add a breakpoint
195 * to all locations that it is about to modify, sync CPUs, update
196 * all the code, sync CPUs, then remove the breakpoints. In this time
197 * if lockdep is enabled, it might jump back into the debug handler
198 * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
199 *
200 * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
201 * make sure the stack pointer does not get reset back to the top
202 * of the debug stack, and instead just reuses the current stack.
203 */
204#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
205
206.macro TRACE_IRQS_OFF_DEBUG
207 call debug_stack_set_zero
208 TRACE_IRQS_OFF
209 call debug_stack_reset
210.endm
211
212.macro TRACE_IRQS_ON_DEBUG
213 call debug_stack_set_zero
214 TRACE_IRQS_ON
215 call debug_stack_reset
216.endm
217
218.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
219 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
220 jnc 1f
221 TRACE_IRQS_ON_DEBUG
2221:
223.endm
224
225#else
226# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
227# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
228# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
229#endif
230
231/*
194 * C code is not supposed to know about undefined top of stack. Every time 232 * C code is not supposed to know about undefined top of stack. Every time
195 * a C function with an pt_regs argument is called from the SYSCALL based 233 * a C function with an pt_regs argument is called from the SYSCALL based
196 * fast path FIXUP_TOP_OF_STACK is needed. 234 * fast path FIXUP_TOP_OF_STACK is needed.
@@ -1098,7 +1136,7 @@ ENTRY(\sym)
1098 subq $ORIG_RAX-R15, %rsp 1136 subq $ORIG_RAX-R15, %rsp
1099 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1137 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1100 call save_paranoid 1138 call save_paranoid
1101 TRACE_IRQS_OFF 1139 TRACE_IRQS_OFF_DEBUG
1102 movq %rsp,%rdi /* pt_regs pointer */ 1140 movq %rsp,%rdi /* pt_regs pointer */
1103 xorl %esi,%esi /* no error code */ 1141 xorl %esi,%esi /* no error code */
1104 subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) 1142 subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
@@ -1393,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip)
1393ENTRY(paranoid_exit) 1431ENTRY(paranoid_exit)
1394 DEFAULT_FRAME 1432 DEFAULT_FRAME
1395 DISABLE_INTERRUPTS(CLBR_NONE) 1433 DISABLE_INTERRUPTS(CLBR_NONE)
1396 TRACE_IRQS_OFF 1434 TRACE_IRQS_OFF_DEBUG
1397 testl %ebx,%ebx /* swapgs needed? */ 1435 testl %ebx,%ebx /* swapgs needed? */
1398 jnz paranoid_restore 1436 jnz paranoid_restore
1399 testl $3,CS(%rsp) 1437 testl $3,CS(%rsp)
@@ -1404,7 +1442,7 @@ paranoid_swapgs:
1404 RESTORE_ALL 8 1442 RESTORE_ALL 8
1405 jmp irq_return 1443 jmp irq_return
1406paranoid_restore: 1444paranoid_restore:
1407 TRACE_IRQS_IRETQ 0 1445 TRACE_IRQS_IRETQ_DEBUG 0
1408 RESTORE_ALL 8 1446 RESTORE_ALL 8
1409 jmp irq_return 1447 jmp irq_return
1410paranoid_userspace: 1448paranoid_userspace: