aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r--arch/x86_64/kernel/Makefile1
-rw-r--r--arch/x86_64/kernel/entry.S188
-rw-r--r--arch/x86_64/kernel/head64.c5
-rw-r--r--arch/x86_64/kernel/irq.c4
-rw-r--r--arch/x86_64/kernel/nmi.c2
-rw-r--r--arch/x86_64/kernel/process.c2
-rw-r--r--arch/x86_64/kernel/smpboot.c2
-rw-r--r--arch/x86_64/kernel/stacktrace.c221
-rw-r--r--arch/x86_64/kernel/traps.c129
9 files changed, 460 insertions, 94 deletions
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 819e84ec5b64..b5aaeafc1cd3 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ 10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
11 pci-dma.o pci-nommu.o alternative.o 11 pci-dma.o pci-nommu.o alternative.o
12 12
13obj-$(CONFIG_STACKTRACE) += stacktrace.o
13obj-$(CONFIG_X86_MCE) += mce.o 14obj-$(CONFIG_X86_MCE) += mce.o
14obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 15obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
15obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o 16obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index ed92c2983606..d464dded68c0 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -41,13 +41,24 @@
41#include <asm/thread_info.h> 41#include <asm/thread_info.h>
42#include <asm/hw_irq.h> 42#include <asm/hw_irq.h>
43#include <asm/page.h> 43#include <asm/page.h>
44#include <asm/irqflags.h>
44 45
45 .code64 46 .code64
46 47
47#ifndef CONFIG_PREEMPT 48#ifndef CONFIG_PREEMPT
48#define retint_kernel retint_restore_args 49#define retint_kernel retint_restore_args
49#endif 50#endif
50 51
52
53.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
54#ifdef CONFIG_TRACE_IRQFLAGS
55 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
56 jnc 1f
57 TRACE_IRQS_ON
581:
59#endif
60.endm
61
51/* 62/*
52 * C code is not supposed to know about undefined top of stack. Every time 63 * C code is not supposed to know about undefined top of stack. Every time
53 * a C function with an pt_regs argument is called from the SYSCALL based 64 * a C function with an pt_regs argument is called from the SYSCALL based
@@ -194,6 +205,10 @@ ENTRY(system_call)
194 swapgs 205 swapgs
195 movq %rsp,%gs:pda_oldrsp 206 movq %rsp,%gs:pda_oldrsp
196 movq %gs:pda_kernelstack,%rsp 207 movq %gs:pda_kernelstack,%rsp
208 /*
209 * No need to follow this irqs off/on section - it's straight
210 * and short:
211 */
197 sti 212 sti
198 SAVE_ARGS 8,1 213 SAVE_ARGS 8,1
199 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) 214 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
@@ -219,10 +234,15 @@ ret_from_sys_call:
219sysret_check: 234sysret_check:
220 GET_THREAD_INFO(%rcx) 235 GET_THREAD_INFO(%rcx)
221 cli 236 cli
237 TRACE_IRQS_OFF
222 movl threadinfo_flags(%rcx),%edx 238 movl threadinfo_flags(%rcx),%edx
223 andl %edi,%edx 239 andl %edi,%edx
224 CFI_REMEMBER_STATE 240 CFI_REMEMBER_STATE
225 jnz sysret_careful 241 jnz sysret_careful
242 /*
243 * sysretq will re-enable interrupts:
244 */
245 TRACE_IRQS_ON
226 movq RIP-ARGOFFSET(%rsp),%rcx 246 movq RIP-ARGOFFSET(%rsp),%rcx
227 CFI_REGISTER rip,rcx 247 CFI_REGISTER rip,rcx
228 RESTORE_ARGS 0,-ARG_SKIP,1 248 RESTORE_ARGS 0,-ARG_SKIP,1
@@ -237,6 +257,7 @@ sysret_careful:
237 CFI_RESTORE_STATE 257 CFI_RESTORE_STATE
238 bt $TIF_NEED_RESCHED,%edx 258 bt $TIF_NEED_RESCHED,%edx
239 jnc sysret_signal 259 jnc sysret_signal
260 TRACE_IRQS_ON
240 sti 261 sti
241 pushq %rdi 262 pushq %rdi
242 CFI_ADJUST_CFA_OFFSET 8 263 CFI_ADJUST_CFA_OFFSET 8
@@ -247,6 +268,7 @@ sysret_careful:
247 268
248 /* Handle a signal */ 269 /* Handle a signal */
249sysret_signal: 270sysret_signal:
271 TRACE_IRQS_ON
250 sti 272 sti
251 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx 273 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
252 jz 1f 274 jz 1f
@@ -261,6 +283,7 @@ sysret_signal:
261 /* Use IRET because user could have changed frame. This 283 /* Use IRET because user could have changed frame. This
262 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 284 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
263 cli 285 cli
286 TRACE_IRQS_OFF
264 jmp int_with_check 287 jmp int_with_check
265 288
266badsys: 289badsys:
@@ -309,6 +332,7 @@ ENTRY(int_ret_from_sys_call)
309 CFI_REL_OFFSET r10,R10-ARGOFFSET 332 CFI_REL_OFFSET r10,R10-ARGOFFSET
310 CFI_REL_OFFSET r11,R11-ARGOFFSET 333 CFI_REL_OFFSET r11,R11-ARGOFFSET
311 cli 334 cli
335 TRACE_IRQS_OFF
312 testl $3,CS-ARGOFFSET(%rsp) 336 testl $3,CS-ARGOFFSET(%rsp)
313 je retint_restore_args 337 je retint_restore_args
314 movl $_TIF_ALLWORK_MASK,%edi 338 movl $_TIF_ALLWORK_MASK,%edi
@@ -327,6 +351,7 @@ int_with_check:
327int_careful: 351int_careful:
328 bt $TIF_NEED_RESCHED,%edx 352 bt $TIF_NEED_RESCHED,%edx
329 jnc int_very_careful 353 jnc int_very_careful
354 TRACE_IRQS_ON
330 sti 355 sti
331 pushq %rdi 356 pushq %rdi
332 CFI_ADJUST_CFA_OFFSET 8 357 CFI_ADJUST_CFA_OFFSET 8
@@ -334,10 +359,12 @@ int_careful:
334 popq %rdi 359 popq %rdi
335 CFI_ADJUST_CFA_OFFSET -8 360 CFI_ADJUST_CFA_OFFSET -8
336 cli 361 cli
362 TRACE_IRQS_OFF
337 jmp int_with_check 363 jmp int_with_check
338 364
339 /* handle signals and tracing -- both require a full stack frame */ 365 /* handle signals and tracing -- both require a full stack frame */
340int_very_careful: 366int_very_careful:
367 TRACE_IRQS_ON
341 sti 368 sti
342 SAVE_REST 369 SAVE_REST
343 /* Check for syscall exit trace */ 370 /* Check for syscall exit trace */
@@ -351,6 +378,7 @@ int_very_careful:
351 CFI_ADJUST_CFA_OFFSET -8 378 CFI_ADJUST_CFA_OFFSET -8
352 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi 379 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
353 cli 380 cli
381 TRACE_IRQS_OFF
354 jmp int_restore_rest 382 jmp int_restore_rest
355 383
356int_signal: 384int_signal:
@@ -363,6 +391,7 @@ int_signal:
363int_restore_rest: 391int_restore_rest:
364 RESTORE_REST 392 RESTORE_REST
365 cli 393 cli
394 TRACE_IRQS_OFF
366 jmp int_with_check 395 jmp int_with_check
367 CFI_ENDPROC 396 CFI_ENDPROC
368END(int_ret_from_sys_call) 397END(int_ret_from_sys_call)
@@ -484,6 +513,10 @@ END(stub_rt_sigreturn)
484 swapgs 513 swapgs
4851: incl %gs:pda_irqcount # RED-PEN should check preempt count 5141: incl %gs:pda_irqcount # RED-PEN should check preempt count
486 cmoveq %gs:pda_irqstackptr,%rsp 515 cmoveq %gs:pda_irqstackptr,%rsp
516 /*
517 * We entered an interrupt context - irqs are off:
518 */
519 TRACE_IRQS_OFF
487 call \func 520 call \func
488 .endm 521 .endm
489 522
@@ -493,6 +526,7 @@ ENTRY(common_interrupt)
493 /* 0(%rsp): oldrsp-ARGOFFSET */ 526 /* 0(%rsp): oldrsp-ARGOFFSET */
494ret_from_intr: 527ret_from_intr:
495 cli 528 cli
529 TRACE_IRQS_OFF
496 decl %gs:pda_irqcount 530 decl %gs:pda_irqcount
497 leaveq 531 leaveq
498 CFI_DEF_CFA_REGISTER rsp 532 CFI_DEF_CFA_REGISTER rsp
@@ -515,9 +549,21 @@ retint_check:
515 CFI_REMEMBER_STATE 549 CFI_REMEMBER_STATE
516 jnz retint_careful 550 jnz retint_careful
517retint_swapgs: 551retint_swapgs:
552 /*
553 * The iretq could re-enable interrupts:
554 */
555 cli
556 TRACE_IRQS_IRETQ
518 swapgs 557 swapgs
558 jmp restore_args
559
519retint_restore_args: 560retint_restore_args:
520 cli 561 cli
562 /*
563 * The iretq could re-enable interrupts:
564 */
565 TRACE_IRQS_IRETQ
566restore_args:
521 RESTORE_ARGS 0,8,0 567 RESTORE_ARGS 0,8,0
522iret_label: 568iret_label:
523 iretq 569 iretq
@@ -530,6 +576,7 @@ iret_label:
530 /* running with kernel gs */ 576 /* running with kernel gs */
531bad_iret: 577bad_iret:
532 movq $11,%rdi /* SIGSEGV */ 578 movq $11,%rdi /* SIGSEGV */
579 TRACE_IRQS_ON
533 sti 580 sti
534 jmp do_exit 581 jmp do_exit
535 .previous 582 .previous
@@ -539,6 +586,7 @@ retint_careful:
539 CFI_RESTORE_STATE 586 CFI_RESTORE_STATE
540 bt $TIF_NEED_RESCHED,%edx 587 bt $TIF_NEED_RESCHED,%edx
541 jnc retint_signal 588 jnc retint_signal
589 TRACE_IRQS_ON
542 sti 590 sti
543 pushq %rdi 591 pushq %rdi
544 CFI_ADJUST_CFA_OFFSET 8 592 CFI_ADJUST_CFA_OFFSET 8
@@ -547,11 +595,13 @@ retint_careful:
547 CFI_ADJUST_CFA_OFFSET -8 595 CFI_ADJUST_CFA_OFFSET -8
548 GET_THREAD_INFO(%rcx) 596 GET_THREAD_INFO(%rcx)
549 cli 597 cli
598 TRACE_IRQS_OFF
550 jmp retint_check 599 jmp retint_check
551 600
552retint_signal: 601retint_signal:
553 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx 602 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
554 jz retint_swapgs 603 jz retint_swapgs
604 TRACE_IRQS_ON
555 sti 605 sti
556 SAVE_REST 606 SAVE_REST
557 movq $-1,ORIG_RAX(%rsp) 607 movq $-1,ORIG_RAX(%rsp)
@@ -560,6 +610,7 @@ retint_signal:
560 call do_notify_resume 610 call do_notify_resume
561 RESTORE_REST 611 RESTORE_REST
562 cli 612 cli
613 TRACE_IRQS_OFF
563 movl $_TIF_NEED_RESCHED,%edi 614 movl $_TIF_NEED_RESCHED,%edi
564 GET_THREAD_INFO(%rcx) 615 GET_THREAD_INFO(%rcx)
565 jmp retint_check 616 jmp retint_check
@@ -666,7 +717,7 @@ END(spurious_interrupt)
666 717
667 /* error code is on the stack already */ 718 /* error code is on the stack already */
668 /* handle NMI like exceptions that can happen everywhere */ 719 /* handle NMI like exceptions that can happen everywhere */
669 .macro paranoidentry sym, ist=0 720 .macro paranoidentry sym, ist=0, irqtrace=1
670 SAVE_ALL 721 SAVE_ALL
671 cld 722 cld
672 movl $1,%ebx 723 movl $1,%ebx
@@ -691,8 +742,73 @@ END(spurious_interrupt)
691 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 742 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
692 .endif 743 .endif
693 cli 744 cli
745 .if \irqtrace
746 TRACE_IRQS_OFF
747 .endif
694 .endm 748 .endm
695 749
750 /*
751 * "Paranoid" exit path from exception stack.
752 * Paranoid because this is used by NMIs and cannot take
753 * any kernel state for granted.
754 * We don't do kernel preemption checks here, because only
755 * NMI should be common and it does not enable IRQs and
756 * cannot get reschedule ticks.
757 *
758 * "trace" is 0 for the NMI handler only, because irq-tracing
759 * is fundamentally NMI-unsafe. (we cannot change the soft and
760 * hard flags at once, atomically)
761 */
762 .macro paranoidexit trace=1
763 /* ebx: no swapgs flag */
764paranoid_exit\trace:
765 testl %ebx,%ebx /* swapgs needed? */
766 jnz paranoid_restore\trace
767 testl $3,CS(%rsp)
768 jnz paranoid_userspace\trace
769paranoid_swapgs\trace:
770 TRACE_IRQS_IRETQ 0
771 swapgs
772paranoid_restore\trace:
773 RESTORE_ALL 8
774 iretq
775paranoid_userspace\trace:
776 GET_THREAD_INFO(%rcx)
777 movl threadinfo_flags(%rcx),%ebx
778 andl $_TIF_WORK_MASK,%ebx
779 jz paranoid_swapgs\trace
780 movq %rsp,%rdi /* &pt_regs */
781 call sync_regs
782 movq %rax,%rsp /* switch stack for scheduling */
783 testl $_TIF_NEED_RESCHED,%ebx
784 jnz paranoid_schedule\trace
785 movl %ebx,%edx /* arg3: thread flags */
786 .if \trace
787 TRACE_IRQS_ON
788 .endif
789 sti
790 xorl %esi,%esi /* arg2: oldset */
791 movq %rsp,%rdi /* arg1: &pt_regs */
792 call do_notify_resume
793 cli
794 .if \trace
795 TRACE_IRQS_OFF
796 .endif
797 jmp paranoid_userspace\trace
798paranoid_schedule\trace:
799 .if \trace
800 TRACE_IRQS_ON
801 .endif
802 sti
803 call schedule
804 cli
805 .if \trace
806 TRACE_IRQS_OFF
807 .endif
808 jmp paranoid_userspace\trace
809 CFI_ENDPROC
810 .endm
811
696/* 812/*
697 * Exception entry point. This expects an error code/orig_rax on the stack 813 * Exception entry point. This expects an error code/orig_rax on the stack
698 * and the exception handler in %rax. 814 * and the exception handler in %rax.
@@ -748,6 +864,7 @@ error_exit:
748 movl %ebx,%eax 864 movl %ebx,%eax
749 RESTORE_REST 865 RESTORE_REST
750 cli 866 cli
867 TRACE_IRQS_OFF
751 GET_THREAD_INFO(%rcx) 868 GET_THREAD_INFO(%rcx)
752 testl %eax,%eax 869 testl %eax,%eax
753 jne retint_kernel 870 jne retint_kernel
@@ -755,6 +872,10 @@ error_exit:
755 movl $_TIF_WORK_MASK,%edi 872 movl $_TIF_WORK_MASK,%edi
756 andl %edi,%edx 873 andl %edi,%edx
757 jnz retint_careful 874 jnz retint_careful
875 /*
876 * The iret might restore flags:
877 */
878 TRACE_IRQS_IRETQ
758 swapgs 879 swapgs
759 RESTORE_ARGS 0,8,0 880 RESTORE_ARGS 0,8,0
760 jmp iret_label 881 jmp iret_label
@@ -916,8 +1037,7 @@ KPROBE_ENTRY(debug)
916 pushq $0 1037 pushq $0
917 CFI_ADJUST_CFA_OFFSET 8 1038 CFI_ADJUST_CFA_OFFSET 8
918 paranoidentry do_debug, DEBUG_STACK 1039 paranoidentry do_debug, DEBUG_STACK
919 jmp paranoid_exit 1040 paranoidexit
920 CFI_ENDPROC
921END(debug) 1041END(debug)
922 .previous .text 1042 .previous .text
923 1043
@@ -926,49 +1046,13 @@ KPROBE_ENTRY(nmi)
926 INTR_FRAME 1046 INTR_FRAME
927 pushq $-1 1047 pushq $-1
928 CFI_ADJUST_CFA_OFFSET 8 1048 CFI_ADJUST_CFA_OFFSET 8
929 paranoidentry do_nmi 1049 paranoidentry do_nmi, 0, 0
930 /* 1050#ifdef CONFIG_TRACE_IRQFLAGS
931 * "Paranoid" exit path from exception stack. 1051 paranoidexit 0
932 * Paranoid because this is used by NMIs and cannot take 1052#else
933 * any kernel state for granted. 1053 jmp paranoid_exit1
934 * We don't do kernel preemption checks here, because only 1054 CFI_ENDPROC
935 * NMI should be common and it does not enable IRQs and 1055#endif
936 * cannot get reschedule ticks.
937 */
938 /* ebx: no swapgs flag */
939paranoid_exit:
940 testl %ebx,%ebx /* swapgs needed? */
941 jnz paranoid_restore
942 testl $3,CS(%rsp)
943 jnz paranoid_userspace
944paranoid_swapgs:
945 swapgs
946paranoid_restore:
947 RESTORE_ALL 8
948 iretq
949paranoid_userspace:
950 GET_THREAD_INFO(%rcx)
951 movl threadinfo_flags(%rcx),%ebx
952 andl $_TIF_WORK_MASK,%ebx
953 jz paranoid_swapgs
954 movq %rsp,%rdi /* &pt_regs */
955 call sync_regs
956 movq %rax,%rsp /* switch stack for scheduling */
957 testl $_TIF_NEED_RESCHED,%ebx
958 jnz paranoid_schedule
959 movl %ebx,%edx /* arg3: thread flags */
960 sti
961 xorl %esi,%esi /* arg2: oldset */
962 movq %rsp,%rdi /* arg1: &pt_regs */
963 call do_notify_resume
964 cli
965 jmp paranoid_userspace
966paranoid_schedule:
967 sti
968 call schedule
969 cli
970 jmp paranoid_userspace
971 CFI_ENDPROC
972END(nmi) 1056END(nmi)
973 .previous .text 1057 .previous .text
974 1058
@@ -977,7 +1061,7 @@ KPROBE_ENTRY(int3)
977 pushq $0 1061 pushq $0
978 CFI_ADJUST_CFA_OFFSET 8 1062 CFI_ADJUST_CFA_OFFSET 8
979 paranoidentry do_int3, DEBUG_STACK 1063 paranoidentry do_int3, DEBUG_STACK
980 jmp paranoid_exit 1064 jmp paranoid_exit1
981 CFI_ENDPROC 1065 CFI_ENDPROC
982END(int3) 1066END(int3)
983 .previous .text 1067 .previous .text
@@ -1006,7 +1090,7 @@ END(reserved)
1006ENTRY(double_fault) 1090ENTRY(double_fault)
1007 XCPT_FRAME 1091 XCPT_FRAME
1008 paranoidentry do_double_fault 1092 paranoidentry do_double_fault
1009 jmp paranoid_exit 1093 jmp paranoid_exit1
1010 CFI_ENDPROC 1094 CFI_ENDPROC
1011END(double_fault) 1095END(double_fault)
1012 1096
@@ -1022,7 +1106,7 @@ END(segment_not_present)
1022ENTRY(stack_segment) 1106ENTRY(stack_segment)
1023 XCPT_FRAME 1107 XCPT_FRAME
1024 paranoidentry do_stack_segment 1108 paranoidentry do_stack_segment
1025 jmp paranoid_exit 1109 jmp paranoid_exit1
1026 CFI_ENDPROC 1110 CFI_ENDPROC
1027END(stack_segment) 1111END(stack_segment)
1028 1112
@@ -1050,7 +1134,7 @@ ENTRY(machine_check)
1050 pushq $0 1134 pushq $0
1051 CFI_ADJUST_CFA_OFFSET 8 1135 CFI_ADJUST_CFA_OFFSET 8
1052 paranoidentry do_machine_check 1136 paranoidentry do_machine_check
1053 jmp paranoid_exit 1137 jmp paranoid_exit1
1054 CFI_ENDPROC 1138 CFI_ENDPROC
1055END(machine_check) 1139END(machine_check)
1056#endif 1140#endif
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
index e6a71c9556d9..36647ce6aecb 100644
--- a/arch/x86_64/kernel/head64.c
+++ b/arch/x86_64/kernel/head64.c
@@ -85,6 +85,11 @@ void __init x86_64_start_kernel(char * real_mode_data)
85 clear_bss(); 85 clear_bss();
86 86
87 /* 87 /*
88 * This must be called really, really early:
89 */
90 lockdep_init();
91
92 /*
88 * switch to init_level4_pgt from boot_level4_pgt 93 * switch to init_level4_pgt from boot_level4_pgt
89 */ 94 */
90 memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); 95 memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t));
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index a1f1df5f7bfc..5221a53e90c1 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -177,8 +177,10 @@ asmlinkage void do_softirq(void)
177 local_irq_save(flags); 177 local_irq_save(flags);
178 pending = local_softirq_pending(); 178 pending = local_softirq_pending();
179 /* Switch to interrupt stack */ 179 /* Switch to interrupt stack */
180 if (pending) 180 if (pending) {
181 call_softirq(); 181 call_softirq();
182 WARN_ON_ONCE(softirq_count());
183 }
182 local_irq_restore(flags); 184 local_irq_restore(flags);
183} 185}
184EXPORT_SYMBOL(do_softirq); 186EXPORT_SYMBOL(do_softirq);
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 476c1472fc07..5baa0c726e97 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -127,7 +127,7 @@ void __cpuinit nmi_watchdog_default(void)
127static __init void nmi_cpu_busy(void *data) 127static __init void nmi_cpu_busy(void *data)
128{ 128{
129 volatile int *endflag = data; 129 volatile int *endflag = data;
130 local_irq_enable(); 130 local_irq_enable_in_hardirq();
131 /* Intentionally don't use cpu_relax here. This is 131 /* Intentionally don't use cpu_relax here. This is
132 to make sure that the performance counter really ticks, 132 to make sure that the performance counter really ticks,
133 even if there is a simulator or similar that catches the 133 even if there is a simulator or similar that catches the
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index ca56e19b8b6e..bb6745d13b8f 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -296,7 +296,7 @@ void __show_regs(struct pt_regs * regs)
296 system_utsname.version); 296 system_utsname.version);
297 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); 297 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
298 printk_address(regs->rip); 298 printk_address(regs->rip);
299 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, 299 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
300 regs->eflags); 300 regs->eflags);
301 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", 301 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
302 regs->rax, regs->rbx, regs->rcx); 302 regs->rax, regs->rbx, regs->rcx);
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 9705a6a384f1..b7c705969791 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -775,6 +775,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
775 }; 775 };
776 DECLARE_WORK(work, do_fork_idle, &c_idle); 776 DECLARE_WORK(work, do_fork_idle, &c_idle);
777 777
778 lockdep_set_class(&c_idle.done.wait.lock, &waitqueue_lock_key);
779
778 /* allocate memory for gdts of secondary cpus. Hotplug is considered */ 780 /* allocate memory for gdts of secondary cpus. Hotplug is considered */
779 if (!cpu_gdt_descr[cpu].address && 781 if (!cpu_gdt_descr[cpu].address &&
780 !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { 782 !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c
new file mode 100644
index 000000000000..32cf55eb9af8
--- /dev/null
+++ b/arch/x86_64/kernel/stacktrace.c
@@ -0,0 +1,221 @@
1/*
2 * arch/x86_64/kernel/stacktrace.c
3 *
4 * Stack trace management functions
5 *
6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/sched.h>
9#include <linux/stacktrace.h>
10
11#include <asm/smp.h>
12
13static inline int
14in_range(unsigned long start, unsigned long addr, unsigned long end)
15{
16 return addr >= start && addr <= end;
17}
18
19static unsigned long
20get_stack_end(struct task_struct *task, unsigned long stack)
21{
22 unsigned long stack_start, stack_end, flags;
23 int i, cpu;
24
25 /*
26 * The most common case is that we are in the task stack:
27 */
28 stack_start = (unsigned long)task->thread_info;
29 stack_end = stack_start + THREAD_SIZE;
30
31 if (in_range(stack_start, stack, stack_end))
32 return stack_end;
33
34 /*
35 * We are in an interrupt if irqstackptr is set:
36 */
37 raw_local_irq_save(flags);
38 cpu = safe_smp_processor_id();
39 stack_end = (unsigned long)cpu_pda(cpu)->irqstackptr;
40
41 if (stack_end) {
42 stack_start = stack_end & ~(IRQSTACKSIZE-1);
43 if (in_range(stack_start, stack, stack_end))
44 goto out_restore;
45 /*
46 * We get here if we are in an IRQ context but we
47 * are also in an exception stack.
48 */
49 }
50
51 /*
52 * Iterate over all exception stacks, and figure out whether
53 * 'stack' is in one of them:
54 */
55 for (i = 0; i < N_EXCEPTION_STACKS; i++) {
56 /*
57 * set 'end' to the end of the exception stack.
58 */
59 stack_end = per_cpu(init_tss, cpu).ist[i];
60 stack_start = stack_end - EXCEPTION_STKSZ;
61
62 /*
63 * Is 'stack' above this exception frame's end?
64 * If yes then skip to the next frame.
65 */
66 if (stack >= stack_end)
67 continue;
68 /*
69 * Is 'stack' above this exception frame's start address?
70 * If yes then we found the right frame.
71 */
72 if (stack >= stack_start)
73 goto out_restore;
74
75 /*
76 * If this is a debug stack, and if it has a larger size than
77 * the usual exception stacks, then 'stack' might still
78 * be within the lower portion of the debug stack:
79 */
80#if DEBUG_STKSZ > EXCEPTION_STKSZ
81 if (i == DEBUG_STACK - 1 && stack >= stack_end - DEBUG_STKSZ) {
82 /*
83 * Black magic. A large debug stack is composed of
84 * multiple exception stack entries, which we
85 * iterate through now. Dont look:
86 */
87 do {
88 stack_end -= EXCEPTION_STKSZ;
89 stack_start -= EXCEPTION_STKSZ;
90 } while (stack < stack_start);
91
92 goto out_restore;
93 }
94#endif
95 }
96 /*
97 * Ok, 'stack' is not pointing to any of the system stacks.
98 */
99 stack_end = 0;
100
101out_restore:
102 raw_local_irq_restore(flags);
103
104 return stack_end;
105}
106
107
108/*
109 * Save stack-backtrace addresses into a stack_trace buffer:
110 */
111static inline unsigned long
112save_context_stack(struct stack_trace *trace, unsigned int skip,
113 unsigned long stack, unsigned long stack_end)
114{
115 unsigned long addr;
116
117#ifdef CONFIG_FRAME_POINTER
118 unsigned long prev_stack = 0;
119
120 while (in_range(prev_stack, stack, stack_end)) {
121 pr_debug("stack: %p\n", (void *)stack);
122 addr = (unsigned long)(((unsigned long *)stack)[1]);
123 pr_debug("addr: %p\n", (void *)addr);
124 if (!skip)
125 trace->entries[trace->nr_entries++] = addr-1;
126 else
127 skip--;
128 if (trace->nr_entries >= trace->max_entries)
129 break;
130 if (!addr)
131 return 0;
132 /*
133 * Stack frames must go forwards (otherwise a loop could
134 * happen if the stackframe is corrupted), so we move
135 * prev_stack forwards:
136 */
137 prev_stack = stack;
138 stack = (unsigned long)(((unsigned long *)stack)[0]);
139 }
140 pr_debug("invalid: %p\n", (void *)stack);
141#else
142 while (stack < stack_end) {
143 addr = ((unsigned long *)stack)[0];
144 stack += sizeof(long);
145 if (__kernel_text_address(addr)) {
146 if (!skip)
147 trace->entries[trace->nr_entries++] = addr-1;
148 else
149 skip--;
150 if (trace->nr_entries >= trace->max_entries)
151 break;
152 }
153 }
154#endif
155 return stack;
156}
157
158#define MAX_STACKS 10
159
160/*
161 * Save stack-backtrace addresses into a stack_trace buffer.
162 * If all_contexts is set, all contexts (hardirq, softirq and process)
163 * are saved. If not set then only the current context is saved.
164 */
165void save_stack_trace(struct stack_trace *trace,
166 struct task_struct *task, int all_contexts,
167 unsigned int skip)
168{
169 unsigned long stack = (unsigned long)&stack;
170 int i, nr_stacks = 0, stacks_done[MAX_STACKS];
171
172 WARN_ON(trace->nr_entries || !trace->max_entries);
173
174 if (!task)
175 task = current;
176
177 pr_debug("task: %p, ti: %p\n", task, task->thread_info);
178
179 if (!task || task == current) {
180 /* Grab rbp right from our regs: */
181 asm ("mov %%rbp, %0" : "=r" (stack));
182 pr_debug("rbp: %p\n", (void *)stack);
183 } else {
184 /* rbp is the last reg pushed by switch_to(): */
185 stack = task->thread.rsp;
186 pr_debug("other task rsp: %p\n", (void *)stack);
187 stack = (unsigned long)(((unsigned long *)stack)[0]);
188 pr_debug("other task rbp: %p\n", (void *)stack);
189 }
190
191 while (1) {
192 unsigned long stack_end = get_stack_end(task, stack);
193
194 pr_debug("stack: %p\n", (void *)stack);
195 pr_debug("stack end: %p\n", (void *)stack_end);
196
197 /*
198 * Invalid stack addres?
199 */
200 if (!stack_end)
201 return;
202 /*
203 * Were we in this stack already? (recursion)
204 */
205 for (i = 0; i < nr_stacks; i++)
206 if (stacks_done[i] == stack_end)
207 return;
208 stacks_done[nr_stacks] = stack_end;
209
210 stack = save_context_stack(trace, skip, stack, stack_end);
211 if (!all_contexts || !stack ||
212 trace->nr_entries >= trace->max_entries)
213 return;
214 trace->entries[trace->nr_entries++] = ULONG_MAX;
215 if (trace->nr_entries >= trace->max_entries)
216 return;
217 if (++nr_stacks >= MAX_STACKS)
218 return;
219 }
220}
221
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 5a5311d3de0f..79d05c482072 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -110,28 +110,31 @@ static int kstack_depth_to_print = 12;
110static int call_trace = 1; 110static int call_trace = 1;
111 111
112#ifdef CONFIG_KALLSYMS 112#ifdef CONFIG_KALLSYMS
113#include <linux/kallsyms.h> 113# include <linux/kallsyms.h>
114int printk_address(unsigned long address) 114void printk_address(unsigned long address)
115{ 115{
116 unsigned long offset = 0, symsize; 116 unsigned long offset = 0, symsize;
117 const char *symname; 117 const char *symname;
118 char *modname; 118 char *modname;
119 char *delim = ":"; 119 char *delim = ":";
120 char namebuf[128]; 120 char namebuf[128];
121 121
122 symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); 122 symname = kallsyms_lookup(address, &symsize, &offset,
123 if (!symname) 123 &modname, namebuf);
124 return printk("[<%016lx>]", address); 124 if (!symname) {
125 if (!modname) 125 printk(" [<%016lx>]\n", address);
126 return;
127 }
128 if (!modname)
126 modname = delim = ""; 129 modname = delim = "";
127 return printk("<%016lx>{%s%s%s%s%+ld}", 130 printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n",
128 address, delim, modname, delim, symname, offset); 131 address, delim, modname, delim, symname, offset, symsize);
129} 132}
130#else 133#else
131int printk_address(unsigned long address) 134void printk_address(unsigned long address)
132{ 135{
133 return printk("[<%016lx>]", address); 136 printk(" [<%016lx>]\n", address);
134} 137}
135#endif 138#endif
136 139
137static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 140static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -149,10 +152,22 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
149 }; 152 };
150 unsigned k; 153 unsigned k;
151 154
155 /*
156 * Iterate over all exception stacks, and figure out whether
157 * 'stack' is in one of them:
158 */
152 for (k = 0; k < N_EXCEPTION_STACKS; k++) { 159 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
153 unsigned long end; 160 unsigned long end;
154 161
162 /*
163 * set 'end' to the end of the exception stack.
164 */
155 switch (k + 1) { 165 switch (k + 1) {
166 /*
167 * TODO: this block is not needed i think, because
168 * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
169 * properly too.
170 */
156#if DEBUG_STKSZ > EXCEPTION_STKSZ 171#if DEBUG_STKSZ > EXCEPTION_STKSZ
157 case DEBUG_STACK: 172 case DEBUG_STACK:
158 end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ; 173 end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
@@ -162,19 +177,43 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
162 end = per_cpu(init_tss, cpu).ist[k]; 177 end = per_cpu(init_tss, cpu).ist[k];
163 break; 178 break;
164 } 179 }
180 /*
181 * Is 'stack' above this exception frame's end?
182 * If yes then skip to the next frame.
183 */
165 if (stack >= end) 184 if (stack >= end)
166 continue; 185 continue;
186 /*
187 * Is 'stack' above this exception frame's start address?
188 * If yes then we found the right frame.
189 */
167 if (stack >= end - EXCEPTION_STKSZ) { 190 if (stack >= end - EXCEPTION_STKSZ) {
191 /*
192 * Make sure we only iterate through an exception
193 * stack once. If it comes up for the second time
194 * then there's something wrong going on - just
195 * break out and return NULL:
196 */
168 if (*usedp & (1U << k)) 197 if (*usedp & (1U << k))
169 break; 198 break;
170 *usedp |= 1U << k; 199 *usedp |= 1U << k;
171 *idp = ids[k]; 200 *idp = ids[k];
172 return (unsigned long *)end; 201 return (unsigned long *)end;
173 } 202 }
203 /*
204 * If this is a debug stack, and if it has a larger size than
205 * the usual exception stacks, then 'stack' might still
206 * be within the lower portion of the debug stack:
207 */
174#if DEBUG_STKSZ > EXCEPTION_STKSZ 208#if DEBUG_STKSZ > EXCEPTION_STKSZ
175 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { 209 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
176 unsigned j = N_EXCEPTION_STACKS - 1; 210 unsigned j = N_EXCEPTION_STACKS - 1;
177 211
212 /*
213 * Black magic. A large debug stack is composed of
214 * multiple exception stack entries, which we
215 * iterate through now. Dont look:
216 */
178 do { 217 do {
179 ++j; 218 ++j;
180 end -= EXCEPTION_STKSZ; 219 end -= EXCEPTION_STKSZ;
@@ -193,20 +232,14 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
193 232
194static int show_trace_unwind(struct unwind_frame_info *info, void *context) 233static int show_trace_unwind(struct unwind_frame_info *info, void *context)
195{ 234{
196 int i = 11, n = 0; 235 int n = 0;
197 236
198 while (unwind(info) == 0 && UNW_PC(info)) { 237 while (unwind(info) == 0 && UNW_PC(info)) {
199 ++n; 238 n++;
200 if (i > 50) { 239 printk_address(UNW_PC(info));
201 printk("\n ");
202 i = 7;
203 } else
204 i += printk(" ");
205 i += printk_address(UNW_PC(info));
206 if (arch_unw_user_mode(info)) 240 if (arch_unw_user_mode(info))
207 break; 241 break;
208 } 242 }
209 printk("\n");
210 return n; 243 return n;
211} 244}
212 245
@@ -224,7 +257,7 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
224 int i = 11; 257 int i = 11;
225 unsigned used = 0; 258 unsigned used = 0;
226 259
227 printk("\nCall Trace:"); 260 printk("\nCall Trace:\n");
228 261
229 if (!tsk) 262 if (!tsk)
230 tsk = current; 263 tsk = current;
@@ -250,16 +283,15 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
250 } 283 }
251 } 284 }
252 285
286 /*
287 * Print function call entries within a stack. 'cond' is the
288 * "end of stackframe" condition, that the 'stack++'
289 * iteration will eventually trigger.
290 */
253#define HANDLE_STACK(cond) \ 291#define HANDLE_STACK(cond) \
254 do while (cond) { \ 292 do while (cond) { \
255 unsigned long addr = *stack++; \ 293 unsigned long addr = *stack++; \
256 if (kernel_text_address(addr)) { \ 294 if (kernel_text_address(addr)) { \
257 if (i > 50) { \
258 printk("\n "); \
259 i = 0; \
260 } \
261 else \
262 i += printk(" "); \
263 /* \ 295 /* \
264 * If the address is either in the text segment of the \ 296 * If the address is either in the text segment of the \
265 * kernel, or in the region which contains vmalloc'ed \ 297 * kernel, or in the region which contains vmalloc'ed \
@@ -268,20 +300,30 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
268 * down the cause of the crash will be able to figure \ 300 * down the cause of the crash will be able to figure \
269 * out the call path that was taken. \ 301 * out the call path that was taken. \
270 */ \ 302 */ \
271 i += printk_address(addr); \ 303 printk_address(addr); \
272 } \ 304 } \
273 } while (0) 305 } while (0)
274 306
275 for(; ; ) { 307 /*
308 * Print function call entries in all stacks, starting at the
309 * current stack address. If the stacks consist of nested
310 * exceptions
311 */
312 for ( ; ; ) {
276 const char *id; 313 const char *id;
277 unsigned long *estack_end; 314 unsigned long *estack_end;
278 estack_end = in_exception_stack(cpu, (unsigned long)stack, 315 estack_end = in_exception_stack(cpu, (unsigned long)stack,
279 &used, &id); 316 &used, &id);
280 317
281 if (estack_end) { 318 if (estack_end) {
282 i += printk(" <%s>", id); 319 printk(" <%s>", id);
283 HANDLE_STACK (stack < estack_end); 320 HANDLE_STACK (stack < estack_end);
284 i += printk(" <EOE>"); 321 printk(" <EOE>");
322 /*
323 * We link to the next stack via the
324 * second-to-last pointer (index -2 to end) in the
325 * exception stack:
326 */
285 stack = (unsigned long *) estack_end[-2]; 327 stack = (unsigned long *) estack_end[-2];
286 continue; 328 continue;
287 } 329 }
@@ -291,19 +333,28 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
291 (IRQSTACKSIZE - 64) / sizeof(*irqstack); 333 (IRQSTACKSIZE - 64) / sizeof(*irqstack);
292 334
293 if (stack >= irqstack && stack < irqstack_end) { 335 if (stack >= irqstack && stack < irqstack_end) {
294 i += printk(" <IRQ>"); 336 printk(" <IRQ>");
295 HANDLE_STACK (stack < irqstack_end); 337 HANDLE_STACK (stack < irqstack_end);
338 /*
339 * We link to the next stack (which would be
340 * the process stack normally) the last
341 * pointer (index -1 to end) in the IRQ stack:
342 */
296 stack = (unsigned long *) (irqstack_end[-1]); 343 stack = (unsigned long *) (irqstack_end[-1]);
297 irqstack_end = NULL; 344 irqstack_end = NULL;
298 i += printk(" <EOI>"); 345 printk(" <EOI>");
299 continue; 346 continue;
300 } 347 }
301 } 348 }
302 break; 349 break;
303 } 350 }
304 351
352 /*
353 * This prints the process stack:
354 */
305 HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); 355 HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
306#undef HANDLE_STACK 356#undef HANDLE_STACK
357
307 printk("\n"); 358 printk("\n");
308} 359}
309 360
@@ -337,8 +388,8 @@ static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned
337 break; 388 break;
338 } 389 }
339 if (i && ((i % 4) == 0)) 390 if (i && ((i % 4) == 0))
340 printk("\n "); 391 printk("\n");
341 printk("%016lx ", *stack++); 392 printk(" %016lx", *stack++);
342 touch_nmi_watchdog(); 393 touch_nmi_watchdog();
343 } 394 }
344 show_trace(tsk, regs, rsp); 395 show_trace(tsk, regs, rsp);