diff options
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/Kconfig | 8 | ||||
-rw-r--r-- | arch/x86_64/Kconfig.debug | 4 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32entry.S | 19 | ||||
-rw-r--r-- | arch/x86_64/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86_64/kernel/entry.S | 188 | ||||
-rw-r--r-- | arch/x86_64/kernel/head64.c | 5 | ||||
-rw-r--r-- | arch/x86_64/kernel/irq.c | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/nmi.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/process.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/smpboot.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/stacktrace.c | 221 | ||||
-rw-r--r-- | arch/x86_64/kernel/traps.c | 129 | ||||
-rw-r--r-- | arch/x86_64/lib/thunk.S | 5 | ||||
-rw-r--r-- | arch/x86_64/mm/fault.c | 1 |
14 files changed, 495 insertions, 96 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index e856804c447f..28df7d88ce2c 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -24,6 +24,14 @@ config X86 | |||
24 | bool | 24 | bool |
25 | default y | 25 | default y |
26 | 26 | ||
27 | config LOCKDEP_SUPPORT | ||
28 | bool | ||
29 | default y | ||
30 | |||
31 | config STACKTRACE_SUPPORT | ||
32 | bool | ||
33 | default y | ||
34 | |||
27 | config SEMAPHORE_SLEEPERS | 35 | config SEMAPHORE_SLEEPERS |
28 | bool | 36 | bool |
29 | default y | 37 | default y |
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug index 1d92ab56c0f9..775d211a5cf9 100644 --- a/arch/x86_64/Kconfig.debug +++ b/arch/x86_64/Kconfig.debug | |||
@@ -1,5 +1,9 @@ | |||
1 | menu "Kernel hacking" | 1 | menu "Kernel hacking" |
2 | 2 | ||
3 | config TRACE_IRQFLAGS_SUPPORT | ||
4 | bool | ||
5 | default y | ||
6 | |||
3 | source "lib/Kconfig.debug" | 7 | source "lib/Kconfig.debug" |
4 | 8 | ||
5 | config DEBUG_RODATA | 9 | config DEBUG_RODATA |
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index c536fa98ea37..9b5bb413a6e9 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <asm/thread_info.h> | 13 | #include <asm/thread_info.h> |
14 | #include <asm/segment.h> | 14 | #include <asm/segment.h> |
15 | #include <asm/vsyscall32.h> | 15 | #include <asm/vsyscall32.h> |
16 | #include <asm/irqflags.h> | ||
16 | #include <linux/linkage.h> | 17 | #include <linux/linkage.h> |
17 | 18 | ||
18 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) | 19 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) |
@@ -75,6 +76,10 @@ ENTRY(ia32_sysenter_target) | |||
75 | swapgs | 76 | swapgs |
76 | movq %gs:pda_kernelstack, %rsp | 77 | movq %gs:pda_kernelstack, %rsp |
77 | addq $(PDA_STACKOFFSET),%rsp | 78 | addq $(PDA_STACKOFFSET),%rsp |
79 | /* | ||
80 | * No need to follow this irqs on/off section: the syscall | ||
81 | * disabled irqs, here we enable it straight after entry: | ||
82 | */ | ||
78 | sti | 83 | sti |
79 | movl %ebp,%ebp /* zero extension */ | 84 | movl %ebp,%ebp /* zero extension */ |
80 | pushq $__USER32_DS | 85 | pushq $__USER32_DS |
@@ -118,6 +123,7 @@ sysenter_do_call: | |||
118 | movq %rax,RAX-ARGOFFSET(%rsp) | 123 | movq %rax,RAX-ARGOFFSET(%rsp) |
119 | GET_THREAD_INFO(%r10) | 124 | GET_THREAD_INFO(%r10) |
120 | cli | 125 | cli |
126 | TRACE_IRQS_OFF | ||
121 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) | 127 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) |
122 | jnz int_ret_from_sys_call | 128 | jnz int_ret_from_sys_call |
123 | andl $~TS_COMPAT,threadinfo_status(%r10) | 129 | andl $~TS_COMPAT,threadinfo_status(%r10) |
@@ -132,6 +138,7 @@ sysenter_do_call: | |||
132 | CFI_REGISTER rsp,rcx | 138 | CFI_REGISTER rsp,rcx |
133 | movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ | 139 | movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */ |
134 | CFI_REGISTER rip,rdx | 140 | CFI_REGISTER rip,rdx |
141 | TRACE_IRQS_ON | ||
135 | swapgs | 142 | swapgs |
136 | sti /* sti only takes effect after the next instruction */ | 143 | sti /* sti only takes effect after the next instruction */ |
137 | /* sysexit */ | 144 | /* sysexit */ |
@@ -186,6 +193,10 @@ ENTRY(ia32_cstar_target) | |||
186 | movl %esp,%r8d | 193 | movl %esp,%r8d |
187 | CFI_REGISTER rsp,r8 | 194 | CFI_REGISTER rsp,r8 |
188 | movq %gs:pda_kernelstack,%rsp | 195 | movq %gs:pda_kernelstack,%rsp |
196 | /* | ||
197 | * No need to follow this irqs on/off section: the syscall | ||
198 | * disabled irqs and here we enable it straight after entry: | ||
199 | */ | ||
189 | sti | 200 | sti |
190 | SAVE_ARGS 8,1,1 | 201 | SAVE_ARGS 8,1,1 |
191 | movl %eax,%eax /* zero extension */ | 202 | movl %eax,%eax /* zero extension */ |
@@ -220,6 +231,7 @@ cstar_do_call: | |||
220 | movq %rax,RAX-ARGOFFSET(%rsp) | 231 | movq %rax,RAX-ARGOFFSET(%rsp) |
221 | GET_THREAD_INFO(%r10) | 232 | GET_THREAD_INFO(%r10) |
222 | cli | 233 | cli |
234 | TRACE_IRQS_OFF | ||
223 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) | 235 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) |
224 | jnz int_ret_from_sys_call | 236 | jnz int_ret_from_sys_call |
225 | andl $~TS_COMPAT,threadinfo_status(%r10) | 237 | andl $~TS_COMPAT,threadinfo_status(%r10) |
@@ -228,6 +240,7 @@ cstar_do_call: | |||
228 | CFI_REGISTER rip,rcx | 240 | CFI_REGISTER rip,rcx |
229 | movl EFLAGS-ARGOFFSET(%rsp),%r11d | 241 | movl EFLAGS-ARGOFFSET(%rsp),%r11d |
230 | /*CFI_REGISTER rflags,r11*/ | 242 | /*CFI_REGISTER rflags,r11*/ |
243 | TRACE_IRQS_ON | ||
231 | movl RSP-ARGOFFSET(%rsp),%esp | 244 | movl RSP-ARGOFFSET(%rsp),%esp |
232 | CFI_RESTORE rsp | 245 | CFI_RESTORE rsp |
233 | swapgs | 246 | swapgs |
@@ -286,7 +299,11 @@ ENTRY(ia32_syscall) | |||
286 | /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ | 299 | /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ |
287 | /*CFI_REL_OFFSET cs,CS-RIP*/ | 300 | /*CFI_REL_OFFSET cs,CS-RIP*/ |
288 | CFI_REL_OFFSET rip,RIP-RIP | 301 | CFI_REL_OFFSET rip,RIP-RIP |
289 | swapgs | 302 | swapgs |
303 | /* | ||
304 | * No need to follow this irqs on/off section: the syscall | ||
305 | * disabled irqs and here we enable it straight after entry: | ||
306 | */ | ||
290 | sti | 307 | sti |
291 | movl %eax,%eax | 308 | movl %eax,%eax |
292 | pushq %rax | 309 | pushq %rax |
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 819e84ec5b64..b5aaeafc1cd3 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -10,6 +10,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ | |||
10 | setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ | 10 | setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ |
11 | pci-dma.o pci-nommu.o alternative.o | 11 | pci-dma.o pci-nommu.o alternative.o |
12 | 12 | ||
13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | ||
13 | obj-$(CONFIG_X86_MCE) += mce.o | 14 | obj-$(CONFIG_X86_MCE) += mce.o |
14 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o | 15 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
15 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o | 16 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o |
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index ed92c2983606..d464dded68c0 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
@@ -41,13 +41,24 @@ | |||
41 | #include <asm/thread_info.h> | 41 | #include <asm/thread_info.h> |
42 | #include <asm/hw_irq.h> | 42 | #include <asm/hw_irq.h> |
43 | #include <asm/page.h> | 43 | #include <asm/page.h> |
44 | #include <asm/irqflags.h> | ||
44 | 45 | ||
45 | .code64 | 46 | .code64 |
46 | 47 | ||
47 | #ifndef CONFIG_PREEMPT | 48 | #ifndef CONFIG_PREEMPT |
48 | #define retint_kernel retint_restore_args | 49 | #define retint_kernel retint_restore_args |
49 | #endif | 50 | #endif |
50 | 51 | ||
52 | |||
53 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET | ||
54 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
55 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | ||
56 | jnc 1f | ||
57 | TRACE_IRQS_ON | ||
58 | 1: | ||
59 | #endif | ||
60 | .endm | ||
61 | |||
51 | /* | 62 | /* |
52 | * C code is not supposed to know about undefined top of stack. Every time | 63 | * C code is not supposed to know about undefined top of stack. Every time |
53 | * a C function with an pt_regs argument is called from the SYSCALL based | 64 | * a C function with an pt_regs argument is called from the SYSCALL based |
@@ -194,6 +205,10 @@ ENTRY(system_call) | |||
194 | swapgs | 205 | swapgs |
195 | movq %rsp,%gs:pda_oldrsp | 206 | movq %rsp,%gs:pda_oldrsp |
196 | movq %gs:pda_kernelstack,%rsp | 207 | movq %gs:pda_kernelstack,%rsp |
208 | /* | ||
209 | * No need to follow this irqs off/on section - it's straight | ||
210 | * and short: | ||
211 | */ | ||
197 | sti | 212 | sti |
198 | SAVE_ARGS 8,1 | 213 | SAVE_ARGS 8,1 |
199 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 214 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
@@ -219,10 +234,15 @@ ret_from_sys_call: | |||
219 | sysret_check: | 234 | sysret_check: |
220 | GET_THREAD_INFO(%rcx) | 235 | GET_THREAD_INFO(%rcx) |
221 | cli | 236 | cli |
237 | TRACE_IRQS_OFF | ||
222 | movl threadinfo_flags(%rcx),%edx | 238 | movl threadinfo_flags(%rcx),%edx |
223 | andl %edi,%edx | 239 | andl %edi,%edx |
224 | CFI_REMEMBER_STATE | 240 | CFI_REMEMBER_STATE |
225 | jnz sysret_careful | 241 | jnz sysret_careful |
242 | /* | ||
243 | * sysretq will re-enable interrupts: | ||
244 | */ | ||
245 | TRACE_IRQS_ON | ||
226 | movq RIP-ARGOFFSET(%rsp),%rcx | 246 | movq RIP-ARGOFFSET(%rsp),%rcx |
227 | CFI_REGISTER rip,rcx | 247 | CFI_REGISTER rip,rcx |
228 | RESTORE_ARGS 0,-ARG_SKIP,1 | 248 | RESTORE_ARGS 0,-ARG_SKIP,1 |
@@ -237,6 +257,7 @@ sysret_careful: | |||
237 | CFI_RESTORE_STATE | 257 | CFI_RESTORE_STATE |
238 | bt $TIF_NEED_RESCHED,%edx | 258 | bt $TIF_NEED_RESCHED,%edx |
239 | jnc sysret_signal | 259 | jnc sysret_signal |
260 | TRACE_IRQS_ON | ||
240 | sti | 261 | sti |
241 | pushq %rdi | 262 | pushq %rdi |
242 | CFI_ADJUST_CFA_OFFSET 8 | 263 | CFI_ADJUST_CFA_OFFSET 8 |
@@ -247,6 +268,7 @@ sysret_careful: | |||
247 | 268 | ||
248 | /* Handle a signal */ | 269 | /* Handle a signal */ |
249 | sysret_signal: | 270 | sysret_signal: |
271 | TRACE_IRQS_ON | ||
250 | sti | 272 | sti |
251 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | 273 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx |
252 | jz 1f | 274 | jz 1f |
@@ -261,6 +283,7 @@ sysret_signal: | |||
261 | /* Use IRET because user could have changed frame. This | 283 | /* Use IRET because user could have changed frame. This |
262 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 284 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
263 | cli | 285 | cli |
286 | TRACE_IRQS_OFF | ||
264 | jmp int_with_check | 287 | jmp int_with_check |
265 | 288 | ||
266 | badsys: | 289 | badsys: |
@@ -309,6 +332,7 @@ ENTRY(int_ret_from_sys_call) | |||
309 | CFI_REL_OFFSET r10,R10-ARGOFFSET | 332 | CFI_REL_OFFSET r10,R10-ARGOFFSET |
310 | CFI_REL_OFFSET r11,R11-ARGOFFSET | 333 | CFI_REL_OFFSET r11,R11-ARGOFFSET |
311 | cli | 334 | cli |
335 | TRACE_IRQS_OFF | ||
312 | testl $3,CS-ARGOFFSET(%rsp) | 336 | testl $3,CS-ARGOFFSET(%rsp) |
313 | je retint_restore_args | 337 | je retint_restore_args |
314 | movl $_TIF_ALLWORK_MASK,%edi | 338 | movl $_TIF_ALLWORK_MASK,%edi |
@@ -327,6 +351,7 @@ int_with_check: | |||
327 | int_careful: | 351 | int_careful: |
328 | bt $TIF_NEED_RESCHED,%edx | 352 | bt $TIF_NEED_RESCHED,%edx |
329 | jnc int_very_careful | 353 | jnc int_very_careful |
354 | TRACE_IRQS_ON | ||
330 | sti | 355 | sti |
331 | pushq %rdi | 356 | pushq %rdi |
332 | CFI_ADJUST_CFA_OFFSET 8 | 357 | CFI_ADJUST_CFA_OFFSET 8 |
@@ -334,10 +359,12 @@ int_careful: | |||
334 | popq %rdi | 359 | popq %rdi |
335 | CFI_ADJUST_CFA_OFFSET -8 | 360 | CFI_ADJUST_CFA_OFFSET -8 |
336 | cli | 361 | cli |
362 | TRACE_IRQS_OFF | ||
337 | jmp int_with_check | 363 | jmp int_with_check |
338 | 364 | ||
339 | /* handle signals and tracing -- both require a full stack frame */ | 365 | /* handle signals and tracing -- both require a full stack frame */ |
340 | int_very_careful: | 366 | int_very_careful: |
367 | TRACE_IRQS_ON | ||
341 | sti | 368 | sti |
342 | SAVE_REST | 369 | SAVE_REST |
343 | /* Check for syscall exit trace */ | 370 | /* Check for syscall exit trace */ |
@@ -351,6 +378,7 @@ int_very_careful: | |||
351 | CFI_ADJUST_CFA_OFFSET -8 | 378 | CFI_ADJUST_CFA_OFFSET -8 |
352 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi | 379 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi |
353 | cli | 380 | cli |
381 | TRACE_IRQS_OFF | ||
354 | jmp int_restore_rest | 382 | jmp int_restore_rest |
355 | 383 | ||
356 | int_signal: | 384 | int_signal: |
@@ -363,6 +391,7 @@ int_signal: | |||
363 | int_restore_rest: | 391 | int_restore_rest: |
364 | RESTORE_REST | 392 | RESTORE_REST |
365 | cli | 393 | cli |
394 | TRACE_IRQS_OFF | ||
366 | jmp int_with_check | 395 | jmp int_with_check |
367 | CFI_ENDPROC | 396 | CFI_ENDPROC |
368 | END(int_ret_from_sys_call) | 397 | END(int_ret_from_sys_call) |
@@ -484,6 +513,10 @@ END(stub_rt_sigreturn) | |||
484 | swapgs | 513 | swapgs |
485 | 1: incl %gs:pda_irqcount # RED-PEN should check preempt count | 514 | 1: incl %gs:pda_irqcount # RED-PEN should check preempt count |
486 | cmoveq %gs:pda_irqstackptr,%rsp | 515 | cmoveq %gs:pda_irqstackptr,%rsp |
516 | /* | ||
517 | * We entered an interrupt context - irqs are off: | ||
518 | */ | ||
519 | TRACE_IRQS_OFF | ||
487 | call \func | 520 | call \func |
488 | .endm | 521 | .endm |
489 | 522 | ||
@@ -493,6 +526,7 @@ ENTRY(common_interrupt) | |||
493 | /* 0(%rsp): oldrsp-ARGOFFSET */ | 526 | /* 0(%rsp): oldrsp-ARGOFFSET */ |
494 | ret_from_intr: | 527 | ret_from_intr: |
495 | cli | 528 | cli |
529 | TRACE_IRQS_OFF | ||
496 | decl %gs:pda_irqcount | 530 | decl %gs:pda_irqcount |
497 | leaveq | 531 | leaveq |
498 | CFI_DEF_CFA_REGISTER rsp | 532 | CFI_DEF_CFA_REGISTER rsp |
@@ -515,9 +549,21 @@ retint_check: | |||
515 | CFI_REMEMBER_STATE | 549 | CFI_REMEMBER_STATE |
516 | jnz retint_careful | 550 | jnz retint_careful |
517 | retint_swapgs: | 551 | retint_swapgs: |
552 | /* | ||
553 | * The iretq could re-enable interrupts: | ||
554 | */ | ||
555 | cli | ||
556 | TRACE_IRQS_IRETQ | ||
518 | swapgs | 557 | swapgs |
558 | jmp restore_args | ||
559 | |||
519 | retint_restore_args: | 560 | retint_restore_args: |
520 | cli | 561 | cli |
562 | /* | ||
563 | * The iretq could re-enable interrupts: | ||
564 | */ | ||
565 | TRACE_IRQS_IRETQ | ||
566 | restore_args: | ||
521 | RESTORE_ARGS 0,8,0 | 567 | RESTORE_ARGS 0,8,0 |
522 | iret_label: | 568 | iret_label: |
523 | iretq | 569 | iretq |
@@ -530,6 +576,7 @@ iret_label: | |||
530 | /* running with kernel gs */ | 576 | /* running with kernel gs */ |
531 | bad_iret: | 577 | bad_iret: |
532 | movq $11,%rdi /* SIGSEGV */ | 578 | movq $11,%rdi /* SIGSEGV */ |
579 | TRACE_IRQS_ON | ||
533 | sti | 580 | sti |
534 | jmp do_exit | 581 | jmp do_exit |
535 | .previous | 582 | .previous |
@@ -539,6 +586,7 @@ retint_careful: | |||
539 | CFI_RESTORE_STATE | 586 | CFI_RESTORE_STATE |
540 | bt $TIF_NEED_RESCHED,%edx | 587 | bt $TIF_NEED_RESCHED,%edx |
541 | jnc retint_signal | 588 | jnc retint_signal |
589 | TRACE_IRQS_ON | ||
542 | sti | 590 | sti |
543 | pushq %rdi | 591 | pushq %rdi |
544 | CFI_ADJUST_CFA_OFFSET 8 | 592 | CFI_ADJUST_CFA_OFFSET 8 |
@@ -547,11 +595,13 @@ retint_careful: | |||
547 | CFI_ADJUST_CFA_OFFSET -8 | 595 | CFI_ADJUST_CFA_OFFSET -8 |
548 | GET_THREAD_INFO(%rcx) | 596 | GET_THREAD_INFO(%rcx) |
549 | cli | 597 | cli |
598 | TRACE_IRQS_OFF | ||
550 | jmp retint_check | 599 | jmp retint_check |
551 | 600 | ||
552 | retint_signal: | 601 | retint_signal: |
553 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | 602 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx |
554 | jz retint_swapgs | 603 | jz retint_swapgs |
604 | TRACE_IRQS_ON | ||
555 | sti | 605 | sti |
556 | SAVE_REST | 606 | SAVE_REST |
557 | movq $-1,ORIG_RAX(%rsp) | 607 | movq $-1,ORIG_RAX(%rsp) |
@@ -560,6 +610,7 @@ retint_signal: | |||
560 | call do_notify_resume | 610 | call do_notify_resume |
561 | RESTORE_REST | 611 | RESTORE_REST |
562 | cli | 612 | cli |
613 | TRACE_IRQS_OFF | ||
563 | movl $_TIF_NEED_RESCHED,%edi | 614 | movl $_TIF_NEED_RESCHED,%edi |
564 | GET_THREAD_INFO(%rcx) | 615 | GET_THREAD_INFO(%rcx) |
565 | jmp retint_check | 616 | jmp retint_check |
@@ -666,7 +717,7 @@ END(spurious_interrupt) | |||
666 | 717 | ||
667 | /* error code is on the stack already */ | 718 | /* error code is on the stack already */ |
668 | /* handle NMI like exceptions that can happen everywhere */ | 719 | /* handle NMI like exceptions that can happen everywhere */ |
669 | .macro paranoidentry sym, ist=0 | 720 | .macro paranoidentry sym, ist=0, irqtrace=1 |
670 | SAVE_ALL | 721 | SAVE_ALL |
671 | cld | 722 | cld |
672 | movl $1,%ebx | 723 | movl $1,%ebx |
@@ -691,8 +742,73 @@ END(spurious_interrupt) | |||
691 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | 742 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) |
692 | .endif | 743 | .endif |
693 | cli | 744 | cli |
745 | .if \irqtrace | ||
746 | TRACE_IRQS_OFF | ||
747 | .endif | ||
694 | .endm | 748 | .endm |
695 | 749 | ||
750 | /* | ||
751 | * "Paranoid" exit path from exception stack. | ||
752 | * Paranoid because this is used by NMIs and cannot take | ||
753 | * any kernel state for granted. | ||
754 | * We don't do kernel preemption checks here, because only | ||
755 | * NMI should be common and it does not enable IRQs and | ||
756 | * cannot get reschedule ticks. | ||
757 | * | ||
758 | * "trace" is 0 for the NMI handler only, because irq-tracing | ||
759 | * is fundamentally NMI-unsafe. (we cannot change the soft and | ||
760 | * hard flags at once, atomically) | ||
761 | */ | ||
762 | .macro paranoidexit trace=1 | ||
763 | /* ebx: no swapgs flag */ | ||
764 | paranoid_exit\trace: | ||
765 | testl %ebx,%ebx /* swapgs needed? */ | ||
766 | jnz paranoid_restore\trace | ||
767 | testl $3,CS(%rsp) | ||
768 | jnz paranoid_userspace\trace | ||
769 | paranoid_swapgs\trace: | ||
770 | TRACE_IRQS_IRETQ 0 | ||
771 | swapgs | ||
772 | paranoid_restore\trace: | ||
773 | RESTORE_ALL 8 | ||
774 | iretq | ||
775 | paranoid_userspace\trace: | ||
776 | GET_THREAD_INFO(%rcx) | ||
777 | movl threadinfo_flags(%rcx),%ebx | ||
778 | andl $_TIF_WORK_MASK,%ebx | ||
779 | jz paranoid_swapgs\trace | ||
780 | movq %rsp,%rdi /* &pt_regs */ | ||
781 | call sync_regs | ||
782 | movq %rax,%rsp /* switch stack for scheduling */ | ||
783 | testl $_TIF_NEED_RESCHED,%ebx | ||
784 | jnz paranoid_schedule\trace | ||
785 | movl %ebx,%edx /* arg3: thread flags */ | ||
786 | .if \trace | ||
787 | TRACE_IRQS_ON | ||
788 | .endif | ||
789 | sti | ||
790 | xorl %esi,%esi /* arg2: oldset */ | ||
791 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
792 | call do_notify_resume | ||
793 | cli | ||
794 | .if \trace | ||
795 | TRACE_IRQS_OFF | ||
796 | .endif | ||
797 | jmp paranoid_userspace\trace | ||
798 | paranoid_schedule\trace: | ||
799 | .if \trace | ||
800 | TRACE_IRQS_ON | ||
801 | .endif | ||
802 | sti | ||
803 | call schedule | ||
804 | cli | ||
805 | .if \trace | ||
806 | TRACE_IRQS_OFF | ||
807 | .endif | ||
808 | jmp paranoid_userspace\trace | ||
809 | CFI_ENDPROC | ||
810 | .endm | ||
811 | |||
696 | /* | 812 | /* |
697 | * Exception entry point. This expects an error code/orig_rax on the stack | 813 | * Exception entry point. This expects an error code/orig_rax on the stack |
698 | * and the exception handler in %rax. | 814 | * and the exception handler in %rax. |
@@ -748,6 +864,7 @@ error_exit: | |||
748 | movl %ebx,%eax | 864 | movl %ebx,%eax |
749 | RESTORE_REST | 865 | RESTORE_REST |
750 | cli | 866 | cli |
867 | TRACE_IRQS_OFF | ||
751 | GET_THREAD_INFO(%rcx) | 868 | GET_THREAD_INFO(%rcx) |
752 | testl %eax,%eax | 869 | testl %eax,%eax |
753 | jne retint_kernel | 870 | jne retint_kernel |
@@ -755,6 +872,10 @@ error_exit: | |||
755 | movl $_TIF_WORK_MASK,%edi | 872 | movl $_TIF_WORK_MASK,%edi |
756 | andl %edi,%edx | 873 | andl %edi,%edx |
757 | jnz retint_careful | 874 | jnz retint_careful |
875 | /* | ||
876 | * The iret might restore flags: | ||
877 | */ | ||
878 | TRACE_IRQS_IRETQ | ||
758 | swapgs | 879 | swapgs |
759 | RESTORE_ARGS 0,8,0 | 880 | RESTORE_ARGS 0,8,0 |
760 | jmp iret_label | 881 | jmp iret_label |
@@ -916,8 +1037,7 @@ KPROBE_ENTRY(debug) | |||
916 | pushq $0 | 1037 | pushq $0 |
917 | CFI_ADJUST_CFA_OFFSET 8 | 1038 | CFI_ADJUST_CFA_OFFSET 8 |
918 | paranoidentry do_debug, DEBUG_STACK | 1039 | paranoidentry do_debug, DEBUG_STACK |
919 | jmp paranoid_exit | 1040 | paranoidexit |
920 | CFI_ENDPROC | ||
921 | END(debug) | 1041 | END(debug) |
922 | .previous .text | 1042 | .previous .text |
923 | 1043 | ||
@@ -926,49 +1046,13 @@ KPROBE_ENTRY(nmi) | |||
926 | INTR_FRAME | 1046 | INTR_FRAME |
927 | pushq $-1 | 1047 | pushq $-1 |
928 | CFI_ADJUST_CFA_OFFSET 8 | 1048 | CFI_ADJUST_CFA_OFFSET 8 |
929 | paranoidentry do_nmi | 1049 | paranoidentry do_nmi, 0, 0 |
930 | /* | 1050 | #ifdef CONFIG_TRACE_IRQFLAGS |
931 | * "Paranoid" exit path from exception stack. | 1051 | paranoidexit 0 |
932 | * Paranoid because this is used by NMIs and cannot take | 1052 | #else |
933 | * any kernel state for granted. | 1053 | jmp paranoid_exit1 |
934 | * We don't do kernel preemption checks here, because only | 1054 | CFI_ENDPROC |
935 | * NMI should be common and it does not enable IRQs and | 1055 | #endif |
936 | * cannot get reschedule ticks. | ||
937 | */ | ||
938 | /* ebx: no swapgs flag */ | ||
939 | paranoid_exit: | ||
940 | testl %ebx,%ebx /* swapgs needed? */ | ||
941 | jnz paranoid_restore | ||
942 | testl $3,CS(%rsp) | ||
943 | jnz paranoid_userspace | ||
944 | paranoid_swapgs: | ||
945 | swapgs | ||
946 | paranoid_restore: | ||
947 | RESTORE_ALL 8 | ||
948 | iretq | ||
949 | paranoid_userspace: | ||
950 | GET_THREAD_INFO(%rcx) | ||
951 | movl threadinfo_flags(%rcx),%ebx | ||
952 | andl $_TIF_WORK_MASK,%ebx | ||
953 | jz paranoid_swapgs | ||
954 | movq %rsp,%rdi /* &pt_regs */ | ||
955 | call sync_regs | ||
956 | movq %rax,%rsp /* switch stack for scheduling */ | ||
957 | testl $_TIF_NEED_RESCHED,%ebx | ||
958 | jnz paranoid_schedule | ||
959 | movl %ebx,%edx /* arg3: thread flags */ | ||
960 | sti | ||
961 | xorl %esi,%esi /* arg2: oldset */ | ||
962 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
963 | call do_notify_resume | ||
964 | cli | ||
965 | jmp paranoid_userspace | ||
966 | paranoid_schedule: | ||
967 | sti | ||
968 | call schedule | ||
969 | cli | ||
970 | jmp paranoid_userspace | ||
971 | CFI_ENDPROC | ||
972 | END(nmi) | 1056 | END(nmi) |
973 | .previous .text | 1057 | .previous .text |
974 | 1058 | ||
@@ -977,7 +1061,7 @@ KPROBE_ENTRY(int3) | |||
977 | pushq $0 | 1061 | pushq $0 |
978 | CFI_ADJUST_CFA_OFFSET 8 | 1062 | CFI_ADJUST_CFA_OFFSET 8 |
979 | paranoidentry do_int3, DEBUG_STACK | 1063 | paranoidentry do_int3, DEBUG_STACK |
980 | jmp paranoid_exit | 1064 | jmp paranoid_exit1 |
981 | CFI_ENDPROC | 1065 | CFI_ENDPROC |
982 | END(int3) | 1066 | END(int3) |
983 | .previous .text | 1067 | .previous .text |
@@ -1006,7 +1090,7 @@ END(reserved) | |||
1006 | ENTRY(double_fault) | 1090 | ENTRY(double_fault) |
1007 | XCPT_FRAME | 1091 | XCPT_FRAME |
1008 | paranoidentry do_double_fault | 1092 | paranoidentry do_double_fault |
1009 | jmp paranoid_exit | 1093 | jmp paranoid_exit1 |
1010 | CFI_ENDPROC | 1094 | CFI_ENDPROC |
1011 | END(double_fault) | 1095 | END(double_fault) |
1012 | 1096 | ||
@@ -1022,7 +1106,7 @@ END(segment_not_present) | |||
1022 | ENTRY(stack_segment) | 1106 | ENTRY(stack_segment) |
1023 | XCPT_FRAME | 1107 | XCPT_FRAME |
1024 | paranoidentry do_stack_segment | 1108 | paranoidentry do_stack_segment |
1025 | jmp paranoid_exit | 1109 | jmp paranoid_exit1 |
1026 | CFI_ENDPROC | 1110 | CFI_ENDPROC |
1027 | END(stack_segment) | 1111 | END(stack_segment) |
1028 | 1112 | ||
@@ -1050,7 +1134,7 @@ ENTRY(machine_check) | |||
1050 | pushq $0 | 1134 | pushq $0 |
1051 | CFI_ADJUST_CFA_OFFSET 8 | 1135 | CFI_ADJUST_CFA_OFFSET 8 |
1052 | paranoidentry do_machine_check | 1136 | paranoidentry do_machine_check |
1053 | jmp paranoid_exit | 1137 | jmp paranoid_exit1 |
1054 | CFI_ENDPROC | 1138 | CFI_ENDPROC |
1055 | END(machine_check) | 1139 | END(machine_check) |
1056 | #endif | 1140 | #endif |
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index e6a71c9556d9..36647ce6aecb 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c | |||
@@ -85,6 +85,11 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
85 | clear_bss(); | 85 | clear_bss(); |
86 | 86 | ||
87 | /* | 87 | /* |
88 | * This must be called really, really early: | ||
89 | */ | ||
90 | lockdep_init(); | ||
91 | |||
92 | /* | ||
88 | * switch to init_level4_pgt from boot_level4_pgt | 93 | * switch to init_level4_pgt from boot_level4_pgt |
89 | */ | 94 | */ |
90 | memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); | 95 | memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); |
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index a1f1df5f7bfc..5221a53e90c1 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c | |||
@@ -177,8 +177,10 @@ asmlinkage void do_softirq(void) | |||
177 | local_irq_save(flags); | 177 | local_irq_save(flags); |
178 | pending = local_softirq_pending(); | 178 | pending = local_softirq_pending(); |
179 | /* Switch to interrupt stack */ | 179 | /* Switch to interrupt stack */ |
180 | if (pending) | 180 | if (pending) { |
181 | call_softirq(); | 181 | call_softirq(); |
182 | WARN_ON_ONCE(softirq_count()); | ||
183 | } | ||
182 | local_irq_restore(flags); | 184 | local_irq_restore(flags); |
183 | } | 185 | } |
184 | EXPORT_SYMBOL(do_softirq); | 186 | EXPORT_SYMBOL(do_softirq); |
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 476c1472fc07..5baa0c726e97 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -127,7 +127,7 @@ void __cpuinit nmi_watchdog_default(void) | |||
127 | static __init void nmi_cpu_busy(void *data) | 127 | static __init void nmi_cpu_busy(void *data) |
128 | { | 128 | { |
129 | volatile int *endflag = data; | 129 | volatile int *endflag = data; |
130 | local_irq_enable(); | 130 | local_irq_enable_in_hardirq(); |
131 | /* Intentionally don't use cpu_relax here. This is | 131 | /* Intentionally don't use cpu_relax here. This is |
132 | to make sure that the performance counter really ticks, | 132 | to make sure that the performance counter really ticks, |
133 | even if there is a simulator or similar that catches the | 133 | even if there is a simulator or similar that catches the |
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index ca56e19b8b6e..bb6745d13b8f 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c | |||
@@ -296,7 +296,7 @@ void __show_regs(struct pt_regs * regs) | |||
296 | system_utsname.version); | 296 | system_utsname.version); |
297 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); | 297 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); |
298 | printk_address(regs->rip); | 298 | printk_address(regs->rip); |
299 | printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, | 299 | printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, |
300 | regs->eflags); | 300 | regs->eflags); |
301 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", | 301 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", |
302 | regs->rax, regs->rbx, regs->rcx); | 302 | regs->rax, regs->rbx, regs->rcx); |
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 9705a6a384f1..b7c705969791 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
@@ -775,6 +775,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) | |||
775 | }; | 775 | }; |
776 | DECLARE_WORK(work, do_fork_idle, &c_idle); | 776 | DECLARE_WORK(work, do_fork_idle, &c_idle); |
777 | 777 | ||
778 | lockdep_set_class(&c_idle.done.wait.lock, &waitqueue_lock_key); | ||
779 | |||
778 | /* allocate memory for gdts of secondary cpus. Hotplug is considered */ | 780 | /* allocate memory for gdts of secondary cpus. Hotplug is considered */ |
779 | if (!cpu_gdt_descr[cpu].address && | 781 | if (!cpu_gdt_descr[cpu].address && |
780 | !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { | 782 | !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { |
diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c new file mode 100644 index 000000000000..32cf55eb9af8 --- /dev/null +++ b/arch/x86_64/kernel/stacktrace.c | |||
@@ -0,0 +1,221 @@ | |||
1 | /* | ||
2 | * arch/x86_64/kernel/stacktrace.c | ||
3 | * | ||
4 | * Stack trace management functions | ||
5 | * | ||
6 | * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
7 | */ | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/stacktrace.h> | ||
10 | |||
11 | #include <asm/smp.h> | ||
12 | |||
13 | static inline int | ||
14 | in_range(unsigned long start, unsigned long addr, unsigned long end) | ||
15 | { | ||
16 | return addr >= start && addr <= end; | ||
17 | } | ||
18 | |||
19 | static unsigned long | ||
20 | get_stack_end(struct task_struct *task, unsigned long stack) | ||
21 | { | ||
22 | unsigned long stack_start, stack_end, flags; | ||
23 | int i, cpu; | ||
24 | |||
25 | /* | ||
26 | * The most common case is that we are in the task stack: | ||
27 | */ | ||
28 | stack_start = (unsigned long)task->thread_info; | ||
29 | stack_end = stack_start + THREAD_SIZE; | ||
30 | |||
31 | if (in_range(stack_start, stack, stack_end)) | ||
32 | return stack_end; | ||
33 | |||
34 | /* | ||
35 | * We are in an interrupt if irqstackptr is set: | ||
36 | */ | ||
37 | raw_local_irq_save(flags); | ||
38 | cpu = safe_smp_processor_id(); | ||
39 | stack_end = (unsigned long)cpu_pda(cpu)->irqstackptr; | ||
40 | |||
41 | if (stack_end) { | ||
42 | stack_start = stack_end & ~(IRQSTACKSIZE-1); | ||
43 | if (in_range(stack_start, stack, stack_end)) | ||
44 | goto out_restore; | ||
45 | /* | ||
46 | * We get here if we are in an IRQ context but we | ||
47 | * are also in an exception stack. | ||
48 | */ | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * Iterate over all exception stacks, and figure out whether | ||
53 | * 'stack' is in one of them: | ||
54 | */ | ||
55 | for (i = 0; i < N_EXCEPTION_STACKS; i++) { | ||
56 | /* | ||
57 | * set 'end' to the end of the exception stack. | ||
58 | */ | ||
59 | stack_end = per_cpu(init_tss, cpu).ist[i]; | ||
60 | stack_start = stack_end - EXCEPTION_STKSZ; | ||
61 | |||
62 | /* | ||
63 | * Is 'stack' above this exception frame's end? | ||
64 | * If yes then skip to the next frame. | ||
65 | */ | ||
66 | if (stack >= stack_end) | ||
67 | continue; | ||
68 | /* | ||
69 | * Is 'stack' above this exception frame's start address? | ||
70 | * If yes then we found the right frame. | ||
71 | */ | ||
72 | if (stack >= stack_start) | ||
73 | goto out_restore; | ||
74 | |||
75 | /* | ||
76 | * If this is a debug stack, and if it has a larger size than | ||
77 | * the usual exception stacks, then 'stack' might still | ||
78 | * be within the lower portion of the debug stack: | ||
79 | */ | ||
80 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
81 | if (i == DEBUG_STACK - 1 && stack >= stack_end - DEBUG_STKSZ) { | ||
82 | /* | ||
83 | * Black magic. A large debug stack is composed of | ||
84 | * multiple exception stack entries, which we | ||
85 | * iterate through now. Dont look: | ||
86 | */ | ||
87 | do { | ||
88 | stack_end -= EXCEPTION_STKSZ; | ||
89 | stack_start -= EXCEPTION_STKSZ; | ||
90 | } while (stack < stack_start); | ||
91 | |||
92 | goto out_restore; | ||
93 | } | ||
94 | #endif | ||
95 | } | ||
96 | /* | ||
97 | * Ok, 'stack' is not pointing to any of the system stacks. | ||
98 | */ | ||
99 | stack_end = 0; | ||
100 | |||
101 | out_restore: | ||
102 | raw_local_irq_restore(flags); | ||
103 | |||
104 | return stack_end; | ||
105 | } | ||
106 | |||
107 | |||
108 | /* | ||
109 | * Save stack-backtrace addresses into a stack_trace buffer: | ||
110 | */ | ||
111 | static inline unsigned long | ||
112 | save_context_stack(struct stack_trace *trace, unsigned int skip, | ||
113 | unsigned long stack, unsigned long stack_end) | ||
114 | { | ||
115 | unsigned long addr; | ||
116 | |||
117 | #ifdef CONFIG_FRAME_POINTER | ||
118 | unsigned long prev_stack = 0; | ||
119 | |||
120 | while (in_range(prev_stack, stack, stack_end)) { | ||
121 | pr_debug("stack: %p\n", (void *)stack); | ||
122 | addr = (unsigned long)(((unsigned long *)stack)[1]); | ||
123 | pr_debug("addr: %p\n", (void *)addr); | ||
124 | if (!skip) | ||
125 | trace->entries[trace->nr_entries++] = addr-1; | ||
126 | else | ||
127 | skip--; | ||
128 | if (trace->nr_entries >= trace->max_entries) | ||
129 | break; | ||
130 | if (!addr) | ||
131 | return 0; | ||
132 | /* | ||
133 | * Stack frames must go forwards (otherwise a loop could | ||
134 | * happen if the stackframe is corrupted), so we move | ||
135 | * prev_stack forwards: | ||
136 | */ | ||
137 | prev_stack = stack; | ||
138 | stack = (unsigned long)(((unsigned long *)stack)[0]); | ||
139 | } | ||
140 | pr_debug("invalid: %p\n", (void *)stack); | ||
141 | #else | ||
142 | while (stack < stack_end) { | ||
143 | addr = ((unsigned long *)stack)[0]; | ||
144 | stack += sizeof(long); | ||
145 | if (__kernel_text_address(addr)) { | ||
146 | if (!skip) | ||
147 | trace->entries[trace->nr_entries++] = addr-1; | ||
148 | else | ||
149 | skip--; | ||
150 | if (trace->nr_entries >= trace->max_entries) | ||
151 | break; | ||
152 | } | ||
153 | } | ||
154 | #endif | ||
155 | return stack; | ||
156 | } | ||
157 | |||
158 | #define MAX_STACKS 10 | ||
159 | |||
160 | /* | ||
161 | * Save stack-backtrace addresses into a stack_trace buffer. | ||
162 | * If all_contexts is set, all contexts (hardirq, softirq and process) | ||
163 | * are saved. If not set then only the current context is saved. | ||
164 | */ | ||
165 | void save_stack_trace(struct stack_trace *trace, | ||
166 | struct task_struct *task, int all_contexts, | ||
167 | unsigned int skip) | ||
168 | { | ||
169 | unsigned long stack = (unsigned long)&stack; | ||
170 | int i, nr_stacks = 0, stacks_done[MAX_STACKS]; | ||
171 | |||
172 | WARN_ON(trace->nr_entries || !trace->max_entries); | ||
173 | |||
174 | if (!task) | ||
175 | task = current; | ||
176 | |||
177 | pr_debug("task: %p, ti: %p\n", task, task->thread_info); | ||
178 | |||
179 | if (!task || task == current) { | ||
180 | /* Grab rbp right from our regs: */ | ||
181 | asm ("mov %%rbp, %0" : "=r" (stack)); | ||
182 | pr_debug("rbp: %p\n", (void *)stack); | ||
183 | } else { | ||
184 | /* rbp is the last reg pushed by switch_to(): */ | ||
185 | stack = task->thread.rsp; | ||
186 | pr_debug("other task rsp: %p\n", (void *)stack); | ||
187 | stack = (unsigned long)(((unsigned long *)stack)[0]); | ||
188 | pr_debug("other task rbp: %p\n", (void *)stack); | ||
189 | } | ||
190 | |||
191 | while (1) { | ||
192 | unsigned long stack_end = get_stack_end(task, stack); | ||
193 | |||
194 | pr_debug("stack: %p\n", (void *)stack); | ||
195 | pr_debug("stack end: %p\n", (void *)stack_end); | ||
196 | |||
197 | /* | ||
198 | * Invalid stack addres? | ||
199 | */ | ||
200 | if (!stack_end) | ||
201 | return; | ||
202 | /* | ||
203 | * Were we in this stack already? (recursion) | ||
204 | */ | ||
205 | for (i = 0; i < nr_stacks; i++) | ||
206 | if (stacks_done[i] == stack_end) | ||
207 | return; | ||
208 | stacks_done[nr_stacks] = stack_end; | ||
209 | |||
210 | stack = save_context_stack(trace, skip, stack, stack_end); | ||
211 | if (!all_contexts || !stack || | ||
212 | trace->nr_entries >= trace->max_entries) | ||
213 | return; | ||
214 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
215 | if (trace->nr_entries >= trace->max_entries) | ||
216 | return; | ||
217 | if (++nr_stacks >= MAX_STACKS) | ||
218 | return; | ||
219 | } | ||
220 | } | ||
221 | |||
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 5a5311d3de0f..79d05c482072 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c | |||
@@ -110,28 +110,31 @@ static int kstack_depth_to_print = 12; | |||
110 | static int call_trace = 1; | 110 | static int call_trace = 1; |
111 | 111 | ||
112 | #ifdef CONFIG_KALLSYMS | 112 | #ifdef CONFIG_KALLSYMS |
113 | #include <linux/kallsyms.h> | 113 | # include <linux/kallsyms.h> |
114 | int printk_address(unsigned long address) | 114 | void printk_address(unsigned long address) |
115 | { | 115 | { |
116 | unsigned long offset = 0, symsize; | 116 | unsigned long offset = 0, symsize; |
117 | const char *symname; | 117 | const char *symname; |
118 | char *modname; | 118 | char *modname; |
119 | char *delim = ":"; | 119 | char *delim = ":"; |
120 | char namebuf[128]; | 120 | char namebuf[128]; |
121 | 121 | ||
122 | symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); | 122 | symname = kallsyms_lookup(address, &symsize, &offset, |
123 | if (!symname) | 123 | &modname, namebuf); |
124 | return printk("[<%016lx>]", address); | 124 | if (!symname) { |
125 | if (!modname) | 125 | printk(" [<%016lx>]\n", address); |
126 | return; | ||
127 | } | ||
128 | if (!modname) | ||
126 | modname = delim = ""; | 129 | modname = delim = ""; |
127 | return printk("<%016lx>{%s%s%s%s%+ld}", | 130 | printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n", |
128 | address, delim, modname, delim, symname, offset); | 131 | address, delim, modname, delim, symname, offset, symsize); |
129 | } | 132 | } |
130 | #else | 133 | #else |
131 | int printk_address(unsigned long address) | 134 | void printk_address(unsigned long address) |
132 | { | 135 | { |
133 | return printk("[<%016lx>]", address); | 136 | printk(" [<%016lx>]\n", address); |
134 | } | 137 | } |
135 | #endif | 138 | #endif |
136 | 139 | ||
137 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 140 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
@@ -149,10 +152,22 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
149 | }; | 152 | }; |
150 | unsigned k; | 153 | unsigned k; |
151 | 154 | ||
155 | /* | ||
156 | * Iterate over all exception stacks, and figure out whether | ||
157 | * 'stack' is in one of them: | ||
158 | */ | ||
152 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { | 159 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { |
153 | unsigned long end; | 160 | unsigned long end; |
154 | 161 | ||
162 | /* | ||
163 | * set 'end' to the end of the exception stack. | ||
164 | */ | ||
155 | switch (k + 1) { | 165 | switch (k + 1) { |
166 | /* | ||
167 | * TODO: this block is not needed i think, because | ||
168 | * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK] | ||
169 | * properly too. | ||
170 | */ | ||
156 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 171 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
157 | case DEBUG_STACK: | 172 | case DEBUG_STACK: |
158 | end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ; | 173 | end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ; |
@@ -162,19 +177,43 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
162 | end = per_cpu(init_tss, cpu).ist[k]; | 177 | end = per_cpu(init_tss, cpu).ist[k]; |
163 | break; | 178 | break; |
164 | } | 179 | } |
180 | /* | ||
181 | * Is 'stack' above this exception frame's end? | ||
182 | * If yes then skip to the next frame. | ||
183 | */ | ||
165 | if (stack >= end) | 184 | if (stack >= end) |
166 | continue; | 185 | continue; |
186 | /* | ||
187 | * Is 'stack' above this exception frame's start address? | ||
188 | * If yes then we found the right frame. | ||
189 | */ | ||
167 | if (stack >= end - EXCEPTION_STKSZ) { | 190 | if (stack >= end - EXCEPTION_STKSZ) { |
191 | /* | ||
192 | * Make sure we only iterate through an exception | ||
193 | * stack once. If it comes up for the second time | ||
194 | * then there's something wrong going on - just | ||
195 | * break out and return NULL: | ||
196 | */ | ||
168 | if (*usedp & (1U << k)) | 197 | if (*usedp & (1U << k)) |
169 | break; | 198 | break; |
170 | *usedp |= 1U << k; | 199 | *usedp |= 1U << k; |
171 | *idp = ids[k]; | 200 | *idp = ids[k]; |
172 | return (unsigned long *)end; | 201 | return (unsigned long *)end; |
173 | } | 202 | } |
203 | /* | ||
204 | * If this is a debug stack, and if it has a larger size than | ||
205 | * the usual exception stacks, then 'stack' might still | ||
206 | * be within the lower portion of the debug stack: | ||
207 | */ | ||
174 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 208 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
175 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { | 209 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { |
176 | unsigned j = N_EXCEPTION_STACKS - 1; | 210 | unsigned j = N_EXCEPTION_STACKS - 1; |
177 | 211 | ||
212 | /* | ||
213 | * Black magic. A large debug stack is composed of | ||
214 | * multiple exception stack entries, which we | ||
215 | * iterate through now. Dont look: | ||
216 | */ | ||
178 | do { | 217 | do { |
179 | ++j; | 218 | ++j; |
180 | end -= EXCEPTION_STKSZ; | 219 | end -= EXCEPTION_STKSZ; |
@@ -193,20 +232,14 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
193 | 232 | ||
194 | static int show_trace_unwind(struct unwind_frame_info *info, void *context) | 233 | static int show_trace_unwind(struct unwind_frame_info *info, void *context) |
195 | { | 234 | { |
196 | int i = 11, n = 0; | 235 | int n = 0; |
197 | 236 | ||
198 | while (unwind(info) == 0 && UNW_PC(info)) { | 237 | while (unwind(info) == 0 && UNW_PC(info)) { |
199 | ++n; | 238 | n++; |
200 | if (i > 50) { | 239 | printk_address(UNW_PC(info)); |
201 | printk("\n "); | ||
202 | i = 7; | ||
203 | } else | ||
204 | i += printk(" "); | ||
205 | i += printk_address(UNW_PC(info)); | ||
206 | if (arch_unw_user_mode(info)) | 240 | if (arch_unw_user_mode(info)) |
207 | break; | 241 | break; |
208 | } | 242 | } |
209 | printk("\n"); | ||
210 | return n; | 243 | return n; |
211 | } | 244 | } |
212 | 245 | ||
@@ -224,7 +257,7 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s | |||
224 | int i = 11; | 257 | int i = 11; |
225 | unsigned used = 0; | 258 | unsigned used = 0; |
226 | 259 | ||
227 | printk("\nCall Trace:"); | 260 | printk("\nCall Trace:\n"); |
228 | 261 | ||
229 | if (!tsk) | 262 | if (!tsk) |
230 | tsk = current; | 263 | tsk = current; |
@@ -250,16 +283,15 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s | |||
250 | } | 283 | } |
251 | } | 284 | } |
252 | 285 | ||
286 | /* | ||
287 | * Print function call entries within a stack. 'cond' is the | ||
288 | * "end of stackframe" condition, that the 'stack++' | ||
289 | * iteration will eventually trigger. | ||
290 | */ | ||
253 | #define HANDLE_STACK(cond) \ | 291 | #define HANDLE_STACK(cond) \ |
254 | do while (cond) { \ | 292 | do while (cond) { \ |
255 | unsigned long addr = *stack++; \ | 293 | unsigned long addr = *stack++; \ |
256 | if (kernel_text_address(addr)) { \ | 294 | if (kernel_text_address(addr)) { \ |
257 | if (i > 50) { \ | ||
258 | printk("\n "); \ | ||
259 | i = 0; \ | ||
260 | } \ | ||
261 | else \ | ||
262 | i += printk(" "); \ | ||
263 | /* \ | 295 | /* \ |
264 | * If the address is either in the text segment of the \ | 296 | * If the address is either in the text segment of the \ |
265 | * kernel, or in the region which contains vmalloc'ed \ | 297 | * kernel, or in the region which contains vmalloc'ed \ |
@@ -268,20 +300,30 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s | |||
268 | * down the cause of the crash will be able to figure \ | 300 | * down the cause of the crash will be able to figure \ |
269 | * out the call path that was taken. \ | 301 | * out the call path that was taken. \ |
270 | */ \ | 302 | */ \ |
271 | i += printk_address(addr); \ | 303 | printk_address(addr); \ |
272 | } \ | 304 | } \ |
273 | } while (0) | 305 | } while (0) |
274 | 306 | ||
275 | for(; ; ) { | 307 | /* |
308 | * Print function call entries in all stacks, starting at the | ||
309 | * current stack address. If the stacks consist of nested | ||
310 | * exceptions | ||
311 | */ | ||
312 | for ( ; ; ) { | ||
276 | const char *id; | 313 | const char *id; |
277 | unsigned long *estack_end; | 314 | unsigned long *estack_end; |
278 | estack_end = in_exception_stack(cpu, (unsigned long)stack, | 315 | estack_end = in_exception_stack(cpu, (unsigned long)stack, |
279 | &used, &id); | 316 | &used, &id); |
280 | 317 | ||
281 | if (estack_end) { | 318 | if (estack_end) { |
282 | i += printk(" <%s>", id); | 319 | printk(" <%s>", id); |
283 | HANDLE_STACK (stack < estack_end); | 320 | HANDLE_STACK (stack < estack_end); |
284 | i += printk(" <EOE>"); | 321 | printk(" <EOE>"); |
322 | /* | ||
323 | * We link to the next stack via the | ||
324 | * second-to-last pointer (index -2 to end) in the | ||
325 | * exception stack: | ||
326 | */ | ||
285 | stack = (unsigned long *) estack_end[-2]; | 327 | stack = (unsigned long *) estack_end[-2]; |
286 | continue; | 328 | continue; |
287 | } | 329 | } |
@@ -291,19 +333,28 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s | |||
291 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); | 333 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); |
292 | 334 | ||
293 | if (stack >= irqstack && stack < irqstack_end) { | 335 | if (stack >= irqstack && stack < irqstack_end) { |
294 | i += printk(" <IRQ>"); | 336 | printk(" <IRQ>"); |
295 | HANDLE_STACK (stack < irqstack_end); | 337 | HANDLE_STACK (stack < irqstack_end); |
338 | /* | ||
339 | * We link to the next stack (which would be | ||
340 | * the process stack normally) the last | ||
341 | * pointer (index -1 to end) in the IRQ stack: | ||
342 | */ | ||
296 | stack = (unsigned long *) (irqstack_end[-1]); | 343 | stack = (unsigned long *) (irqstack_end[-1]); |
297 | irqstack_end = NULL; | 344 | irqstack_end = NULL; |
298 | i += printk(" <EOI>"); | 345 | printk(" <EOI>"); |
299 | continue; | 346 | continue; |
300 | } | 347 | } |
301 | } | 348 | } |
302 | break; | 349 | break; |
303 | } | 350 | } |
304 | 351 | ||
352 | /* | ||
353 | * This prints the process stack: | ||
354 | */ | ||
305 | HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); | 355 | HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); |
306 | #undef HANDLE_STACK | 356 | #undef HANDLE_STACK |
357 | |||
307 | printk("\n"); | 358 | printk("\n"); |
308 | } | 359 | } |
309 | 360 | ||
@@ -337,8 +388,8 @@ static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned | |||
337 | break; | 388 | break; |
338 | } | 389 | } |
339 | if (i && ((i % 4) == 0)) | 390 | if (i && ((i % 4) == 0)) |
340 | printk("\n "); | 391 | printk("\n"); |
341 | printk("%016lx ", *stack++); | 392 | printk(" %016lx", *stack++); |
342 | touch_nmi_watchdog(); | 393 | touch_nmi_watchdog(); |
343 | } | 394 | } |
344 | show_trace(tsk, regs, rsp); | 395 | show_trace(tsk, regs, rsp); |
diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S index e49af0032e94..332ea5dff916 100644 --- a/arch/x86_64/lib/thunk.S +++ b/arch/x86_64/lib/thunk.S | |||
@@ -47,6 +47,11 @@ | |||
47 | thunk_retrax __down_failed_interruptible,__down_interruptible | 47 | thunk_retrax __down_failed_interruptible,__down_interruptible |
48 | thunk_retrax __down_failed_trylock,__down_trylock | 48 | thunk_retrax __down_failed_trylock,__down_trylock |
49 | thunk __up_wakeup,__up | 49 | thunk __up_wakeup,__up |
50 | |||
51 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
52 | thunk trace_hardirqs_on_thunk,trace_hardirqs_on | ||
53 | thunk trace_hardirqs_off_thunk,trace_hardirqs_off | ||
54 | #endif | ||
50 | 55 | ||
51 | /* SAVE_ARGS below is used only for the .cfi directives it contains. */ | 56 | /* SAVE_ARGS below is used only for the .cfi directives it contains. */ |
52 | CFI_STARTPROC | 57 | CFI_STARTPROC |
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 5afcf6eb00fa..ac8ea66ccb94 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c | |||
@@ -570,7 +570,6 @@ no_context: | |||
570 | printk(KERN_ALERT "Unable to handle kernel paging request"); | 570 | printk(KERN_ALERT "Unable to handle kernel paging request"); |
571 | printk(" at %016lx RIP: \n" KERN_ALERT,address); | 571 | printk(" at %016lx RIP: \n" KERN_ALERT,address); |
572 | printk_address(regs->rip); | 572 | printk_address(regs->rip); |
573 | printk("\n"); | ||
574 | dump_pagetable(address); | 573 | dump_pagetable(address); |
575 | tsk->thread.cr2 = address; | 574 | tsk->thread.cr2 = address; |
576 | tsk->thread.trap_no = 14; | 575 | tsk->thread.trap_no = 14; |