diff options
author | Steven Rostedt <srostedt@redhat.com> | 2012-05-30 11:54:53 -0400 |
---|---|---|
committer | Steven Rostedt <rostedt@goodmis.org> | 2012-05-31 23:12:22 -0400 |
commit | 5963e317b1e9d2a4511503916d8fd664bb8fa8fb (patch) | |
tree | 792e282724da9e5311f359de7809cde5ef3991a6 | |
parent | f8988175fd70874d1fb3712b1c5d3bfc6d455202 (diff) |
ftrace/x86: Do not change stacks in DEBUG when calling lockdep
When both DYNAMIC_FTRACE and LOCKDEP are set, the TRACE_IRQS_ON/OFF
will call into the lockdep code. The lockdep code can call lots of
functions that may be traced by ftrace. When ftrace is updating its
code and hits a breakpoint, the breakpoint handler will call into
lockdep. If lockdep happens to call a function that also has a breakpoint
attached, it will jump back into the breakpoint handler resetting
the stack to the debug stack and corrupt the contents currently on
that stack.
The 'do_sym' call that calls do_int3() is protected by modifying the
IST table to point to a different location if another breakpoint is
hit. But the TRACE_IRQS_OFF/ON are outside that protection, and if
a breakpoint is hit from those, the stack will get corrupted, and
the kernel will crash:
[ 1013.243754] BUG: unable to handle kernel NULL pointer dereference at 0000000000000002
[ 1013.272665] IP: [<ffff880145cc0000>] 0xffff880145cbffff
[ 1013.285186] PGD 1401b2067 PUD 14324c067 PMD 0
[ 1013.298832] Oops: 0010 [#1] PREEMPT SMP
[ 1013.310600] CPU 2
[ 1013.317904] Modules linked in: ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables crc32c_intel ghash_clmulni_intel microcode usb_debug serio_raw pcspkr iTCO_wdt i2c_i801 iTCO_vendor_support e1000e nfsd nfs_acl auth_rpcgss lockd sunrpc i915 video i2c_algo_bit drm_kms_helper drm i2c_core [last unloaded: scsi_wait_scan]
[ 1013.401848]
[ 1013.407399] Pid: 112, comm: kworker/2:1 Not tainted 3.4.0+ #30
[ 1013.437943] RIP: 8eb8:[<ffff88014630a000>] [<ffff88014630a000>] 0xffff880146309fff
[ 1013.459871] RSP: ffffffff8165e919:ffff88014780f408 EFLAGS: 00010046
[ 1013.477909] RAX: 0000000000000001 RBX: ffffffff81104020 RCX: 0000000000000000
[ 1013.499458] RDX: ffff880148008ea8 RSI: ffffffff8131ef40 RDI: ffffffff82203b20
[ 1013.521612] RBP: ffffffff81005751 R08: 0000000000000000 R09: 0000000000000000
[ 1013.543121] R10: ffffffff82cdc318 R11: 0000000000000000 R12: ffff880145cc0000
[ 1013.564614] R13: ffff880148008eb8 R14: 0000000000000002 R15: ffff88014780cb40
[ 1013.586108] FS: 0000000000000000(0000) GS:ffff880148000000(0000) knlGS:0000000000000000
[ 1013.609458] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 1013.627420] CR2: 0000000000000002 CR3: 0000000141f10000 CR4: 00000000001407e0
[ 1013.649051] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1013.670724] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 1013.692376] Process kworker/2:1 (pid: 112, threadinfo ffff88013fe0e000, task ffff88014020a6a0)
[ 1013.717028] Stack:
[ 1013.724131] ffff88014780f570 ffff880145cc0000 0000400000004000 0000000000000000
[ 1013.745918] cccccccccccccccc ffff88014780cca8 ffffffff811072bb ffffffff81651627
[ 1013.767870] ffffffff8118f8a7 ffffffff811072bb ffffffff81f2b6c5 ffffffff81f11bdb
[ 1013.790021] Call Trace:
[ 1013.800701] Code: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a <e7> d7 64 81 ff ff ff ff 01 00 00 00 00 00 00 00 65 d9 64 81 ff
[ 1013.861443] RIP [<ffff88014630a000>] 0xffff880146309fff
[ 1013.884466] RSP <ffff88014780f408>
[ 1013.901507] CR2: 0000000000000002
The solution was to reuse the NMI functions that change the IDT table to make the debug
stack keep its current stack (in kernel mode) when hitting a breakpoint:
call debug_stack_set_zero
TRACE_IRQS_ON
call debug_stack_reset
If the TRACE_IRQS_ON happens to hit a breakpoint then it will keep the current stack
and not crash the box.
Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
-rw-r--r-- | arch/x86/kernel/entry_64.S | 44 |
1 files changed, 41 insertions, 3 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 320852d02026..7d65133b51be 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -191,6 +191,44 @@ ENDPROC(native_usergs_sysret64) | |||
191 | .endm | 191 | .endm |
192 | 192 | ||
193 | /* | 193 | /* |
194 | * When dynamic function tracer is enabled it will add a breakpoint | ||
195 | * to all locations that it is about to modify, sync CPUs, update | ||
196 | * all the code, sync CPUs, then remove the breakpoints. In this time | ||
197 | * if lockdep is enabled, it might jump back into the debug handler | ||
198 | * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). | ||
199 | * | ||
200 | * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to | ||
201 | * make sure the stack pointer does not get reset back to the top | ||
202 | * of the debug stack, and instead just reuses the current stack. | ||
203 | */ | ||
204 | #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) | ||
205 | |||
206 | .macro TRACE_IRQS_OFF_DEBUG | ||
207 | call debug_stack_set_zero | ||
208 | TRACE_IRQS_OFF | ||
209 | call debug_stack_reset | ||
210 | .endm | ||
211 | |||
212 | .macro TRACE_IRQS_ON_DEBUG | ||
213 | call debug_stack_set_zero | ||
214 | TRACE_IRQS_ON | ||
215 | call debug_stack_reset | ||
216 | .endm | ||
217 | |||
218 | .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET | ||
219 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | ||
220 | jnc 1f | ||
221 | TRACE_IRQS_ON_DEBUG | ||
222 | 1: | ||
223 | .endm | ||
224 | |||
225 | #else | ||
226 | # define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF | ||
227 | # define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON | ||
228 | # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ | ||
229 | #endif | ||
230 | |||
231 | /* | ||
194 | * C code is not supposed to know about undefined top of stack. Every time | 232 | * C code is not supposed to know about undefined top of stack. Every time |
195 | * a C function with an pt_regs argument is called from the SYSCALL based | 233 | * a C function with an pt_regs argument is called from the SYSCALL based |
196 | * fast path FIXUP_TOP_OF_STACK is needed. | 234 | * fast path FIXUP_TOP_OF_STACK is needed. |
@@ -1098,7 +1136,7 @@ ENTRY(\sym) | |||
1098 | subq $ORIG_RAX-R15, %rsp | 1136 | subq $ORIG_RAX-R15, %rsp |
1099 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1137 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1100 | call save_paranoid | 1138 | call save_paranoid |
1101 | TRACE_IRQS_OFF | 1139 | TRACE_IRQS_OFF_DEBUG |
1102 | movq %rsp,%rdi /* pt_regs pointer */ | 1140 | movq %rsp,%rdi /* pt_regs pointer */ |
1103 | xorl %esi,%esi /* no error code */ | 1141 | xorl %esi,%esi /* no error code */ |
1104 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) | 1142 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) |
@@ -1393,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip) | |||
1393 | ENTRY(paranoid_exit) | 1431 | ENTRY(paranoid_exit) |
1394 | DEFAULT_FRAME | 1432 | DEFAULT_FRAME |
1395 | DISABLE_INTERRUPTS(CLBR_NONE) | 1433 | DISABLE_INTERRUPTS(CLBR_NONE) |
1396 | TRACE_IRQS_OFF | 1434 | TRACE_IRQS_OFF_DEBUG |
1397 | testl %ebx,%ebx /* swapgs needed? */ | 1435 | testl %ebx,%ebx /* swapgs needed? */ |
1398 | jnz paranoid_restore | 1436 | jnz paranoid_restore |
1399 | testl $3,CS(%rsp) | 1437 | testl $3,CS(%rsp) |
@@ -1404,7 +1442,7 @@ paranoid_swapgs: | |||
1404 | RESTORE_ALL 8 | 1442 | RESTORE_ALL 8 |
1405 | jmp irq_return | 1443 | jmp irq_return |
1406 | paranoid_restore: | 1444 | paranoid_restore: |
1407 | TRACE_IRQS_IRETQ 0 | 1445 | TRACE_IRQS_IRETQ_DEBUG 0 |
1408 | RESTORE_ALL 8 | 1446 | RESTORE_ALL 8 |
1409 | jmp irq_return | 1447 | jmp irq_return |
1410 | paranoid_userspace: | 1448 | paranoid_userspace: |