diff options
Diffstat (limited to 'arch/x86_64/kernel')
| -rw-r--r-- | arch/x86_64/kernel/Makefile | 1 | ||||
| -rw-r--r-- | arch/x86_64/kernel/entry.S | 188 | ||||
| -rw-r--r-- | arch/x86_64/kernel/head64.c | 5 | ||||
| -rw-r--r-- | arch/x86_64/kernel/irq.c | 4 | ||||
| -rw-r--r-- | arch/x86_64/kernel/nmi.c | 2 | ||||
| -rw-r--r-- | arch/x86_64/kernel/process.c | 2 | ||||
| -rw-r--r-- | arch/x86_64/kernel/smpboot.c | 2 | ||||
| -rw-r--r-- | arch/x86_64/kernel/stacktrace.c | 221 | ||||
| -rw-r--r-- | arch/x86_64/kernel/traps.c | 129 |
9 files changed, 460 insertions, 94 deletions
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 819e84ec5b64..b5aaeafc1cd3 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
| @@ -10,6 +10,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ | |||
| 10 | setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ | 10 | setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ |
| 11 | pci-dma.o pci-nommu.o alternative.o | 11 | pci-dma.o pci-nommu.o alternative.o |
| 12 | 12 | ||
| 13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | ||
| 13 | obj-$(CONFIG_X86_MCE) += mce.o | 14 | obj-$(CONFIG_X86_MCE) += mce.o |
| 14 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o | 15 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
| 15 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o | 16 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o |
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index ed92c2983606..d464dded68c0 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
| @@ -41,13 +41,24 @@ | |||
| 41 | #include <asm/thread_info.h> | 41 | #include <asm/thread_info.h> |
| 42 | #include <asm/hw_irq.h> | 42 | #include <asm/hw_irq.h> |
| 43 | #include <asm/page.h> | 43 | #include <asm/page.h> |
| 44 | #include <asm/irqflags.h> | ||
| 44 | 45 | ||
| 45 | .code64 | 46 | .code64 |
| 46 | 47 | ||
| 47 | #ifndef CONFIG_PREEMPT | 48 | #ifndef CONFIG_PREEMPT |
| 48 | #define retint_kernel retint_restore_args | 49 | #define retint_kernel retint_restore_args |
| 49 | #endif | 50 | #endif |
| 50 | 51 | ||
| 52 | |||
| 53 | .macro TRACE_IRQS_IRETQ offset=ARGOFFSET | ||
| 54 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
| 55 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | ||
| 56 | jnc 1f | ||
| 57 | TRACE_IRQS_ON | ||
| 58 | 1: | ||
| 59 | #endif | ||
| 60 | .endm | ||
| 61 | |||
| 51 | /* | 62 | /* |
| 52 | * C code is not supposed to know about undefined top of stack. Every time | 63 | * C code is not supposed to know about undefined top of stack. Every time |
| 53 | * a C function with an pt_regs argument is called from the SYSCALL based | 64 | * a C function with an pt_regs argument is called from the SYSCALL based |
| @@ -194,6 +205,10 @@ ENTRY(system_call) | |||
| 194 | swapgs | 205 | swapgs |
| 195 | movq %rsp,%gs:pda_oldrsp | 206 | movq %rsp,%gs:pda_oldrsp |
| 196 | movq %gs:pda_kernelstack,%rsp | 207 | movq %gs:pda_kernelstack,%rsp |
| 208 | /* | ||
| 209 | * No need to follow this irqs off/on section - it's straight | ||
| 210 | * and short: | ||
| 211 | */ | ||
| 197 | sti | 212 | sti |
| 198 | SAVE_ARGS 8,1 | 213 | SAVE_ARGS 8,1 |
| 199 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 214 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
| @@ -219,10 +234,15 @@ ret_from_sys_call: | |||
| 219 | sysret_check: | 234 | sysret_check: |
| 220 | GET_THREAD_INFO(%rcx) | 235 | GET_THREAD_INFO(%rcx) |
| 221 | cli | 236 | cli |
| 237 | TRACE_IRQS_OFF | ||
| 222 | movl threadinfo_flags(%rcx),%edx | 238 | movl threadinfo_flags(%rcx),%edx |
| 223 | andl %edi,%edx | 239 | andl %edi,%edx |
| 224 | CFI_REMEMBER_STATE | 240 | CFI_REMEMBER_STATE |
| 225 | jnz sysret_careful | 241 | jnz sysret_careful |
| 242 | /* | ||
| 243 | * sysretq will re-enable interrupts: | ||
| 244 | */ | ||
| 245 | TRACE_IRQS_ON | ||
| 226 | movq RIP-ARGOFFSET(%rsp),%rcx | 246 | movq RIP-ARGOFFSET(%rsp),%rcx |
| 227 | CFI_REGISTER rip,rcx | 247 | CFI_REGISTER rip,rcx |
| 228 | RESTORE_ARGS 0,-ARG_SKIP,1 | 248 | RESTORE_ARGS 0,-ARG_SKIP,1 |
| @@ -237,6 +257,7 @@ sysret_careful: | |||
| 237 | CFI_RESTORE_STATE | 257 | CFI_RESTORE_STATE |
| 238 | bt $TIF_NEED_RESCHED,%edx | 258 | bt $TIF_NEED_RESCHED,%edx |
| 239 | jnc sysret_signal | 259 | jnc sysret_signal |
| 260 | TRACE_IRQS_ON | ||
| 240 | sti | 261 | sti |
| 241 | pushq %rdi | 262 | pushq %rdi |
| 242 | CFI_ADJUST_CFA_OFFSET 8 | 263 | CFI_ADJUST_CFA_OFFSET 8 |
| @@ -247,6 +268,7 @@ sysret_careful: | |||
| 247 | 268 | ||
| 248 | /* Handle a signal */ | 269 | /* Handle a signal */ |
| 249 | sysret_signal: | 270 | sysret_signal: |
| 271 | TRACE_IRQS_ON | ||
| 250 | sti | 272 | sti |
| 251 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | 273 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx |
| 252 | jz 1f | 274 | jz 1f |
| @@ -261,6 +283,7 @@ sysret_signal: | |||
| 261 | /* Use IRET because user could have changed frame. This | 283 | /* Use IRET because user could have changed frame. This |
| 262 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 284 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
| 263 | cli | 285 | cli |
| 286 | TRACE_IRQS_OFF | ||
| 264 | jmp int_with_check | 287 | jmp int_with_check |
| 265 | 288 | ||
| 266 | badsys: | 289 | badsys: |
| @@ -309,6 +332,7 @@ ENTRY(int_ret_from_sys_call) | |||
| 309 | CFI_REL_OFFSET r10,R10-ARGOFFSET | 332 | CFI_REL_OFFSET r10,R10-ARGOFFSET |
| 310 | CFI_REL_OFFSET r11,R11-ARGOFFSET | 333 | CFI_REL_OFFSET r11,R11-ARGOFFSET |
| 311 | cli | 334 | cli |
| 335 | TRACE_IRQS_OFF | ||
| 312 | testl $3,CS-ARGOFFSET(%rsp) | 336 | testl $3,CS-ARGOFFSET(%rsp) |
| 313 | je retint_restore_args | 337 | je retint_restore_args |
| 314 | movl $_TIF_ALLWORK_MASK,%edi | 338 | movl $_TIF_ALLWORK_MASK,%edi |
| @@ -327,6 +351,7 @@ int_with_check: | |||
| 327 | int_careful: | 351 | int_careful: |
| 328 | bt $TIF_NEED_RESCHED,%edx | 352 | bt $TIF_NEED_RESCHED,%edx |
| 329 | jnc int_very_careful | 353 | jnc int_very_careful |
| 354 | TRACE_IRQS_ON | ||
| 330 | sti | 355 | sti |
| 331 | pushq %rdi | 356 | pushq %rdi |
| 332 | CFI_ADJUST_CFA_OFFSET 8 | 357 | CFI_ADJUST_CFA_OFFSET 8 |
| @@ -334,10 +359,12 @@ int_careful: | |||
| 334 | popq %rdi | 359 | popq %rdi |
| 335 | CFI_ADJUST_CFA_OFFSET -8 | 360 | CFI_ADJUST_CFA_OFFSET -8 |
| 336 | cli | 361 | cli |
| 362 | TRACE_IRQS_OFF | ||
| 337 | jmp int_with_check | 363 | jmp int_with_check |
| 338 | 364 | ||
| 339 | /* handle signals and tracing -- both require a full stack frame */ | 365 | /* handle signals and tracing -- both require a full stack frame */ |
| 340 | int_very_careful: | 366 | int_very_careful: |
| 367 | TRACE_IRQS_ON | ||
| 341 | sti | 368 | sti |
| 342 | SAVE_REST | 369 | SAVE_REST |
| 343 | /* Check for syscall exit trace */ | 370 | /* Check for syscall exit trace */ |
| @@ -351,6 +378,7 @@ int_very_careful: | |||
| 351 | CFI_ADJUST_CFA_OFFSET -8 | 378 | CFI_ADJUST_CFA_OFFSET -8 |
| 352 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi | 379 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi |
| 353 | cli | 380 | cli |
| 381 | TRACE_IRQS_OFF | ||
| 354 | jmp int_restore_rest | 382 | jmp int_restore_rest |
| 355 | 383 | ||
| 356 | int_signal: | 384 | int_signal: |
| @@ -363,6 +391,7 @@ int_signal: | |||
| 363 | int_restore_rest: | 391 | int_restore_rest: |
| 364 | RESTORE_REST | 392 | RESTORE_REST |
| 365 | cli | 393 | cli |
| 394 | TRACE_IRQS_OFF | ||
| 366 | jmp int_with_check | 395 | jmp int_with_check |
| 367 | CFI_ENDPROC | 396 | CFI_ENDPROC |
| 368 | END(int_ret_from_sys_call) | 397 | END(int_ret_from_sys_call) |
| @@ -484,6 +513,10 @@ END(stub_rt_sigreturn) | |||
| 484 | swapgs | 513 | swapgs |
| 485 | 1: incl %gs:pda_irqcount # RED-PEN should check preempt count | 514 | 1: incl %gs:pda_irqcount # RED-PEN should check preempt count |
| 486 | cmoveq %gs:pda_irqstackptr,%rsp | 515 | cmoveq %gs:pda_irqstackptr,%rsp |
| 516 | /* | ||
| 517 | * We entered an interrupt context - irqs are off: | ||
| 518 | */ | ||
| 519 | TRACE_IRQS_OFF | ||
| 487 | call \func | 520 | call \func |
| 488 | .endm | 521 | .endm |
| 489 | 522 | ||
| @@ -493,6 +526,7 @@ ENTRY(common_interrupt) | |||
| 493 | /* 0(%rsp): oldrsp-ARGOFFSET */ | 526 | /* 0(%rsp): oldrsp-ARGOFFSET */ |
| 494 | ret_from_intr: | 527 | ret_from_intr: |
| 495 | cli | 528 | cli |
| 529 | TRACE_IRQS_OFF | ||
| 496 | decl %gs:pda_irqcount | 530 | decl %gs:pda_irqcount |
| 497 | leaveq | 531 | leaveq |
| 498 | CFI_DEF_CFA_REGISTER rsp | 532 | CFI_DEF_CFA_REGISTER rsp |
| @@ -515,9 +549,21 @@ retint_check: | |||
| 515 | CFI_REMEMBER_STATE | 549 | CFI_REMEMBER_STATE |
| 516 | jnz retint_careful | 550 | jnz retint_careful |
| 517 | retint_swapgs: | 551 | retint_swapgs: |
| 552 | /* | ||
| 553 | * The iretq could re-enable interrupts: | ||
| 554 | */ | ||
| 555 | cli | ||
| 556 | TRACE_IRQS_IRETQ | ||
| 518 | swapgs | 557 | swapgs |
| 558 | jmp restore_args | ||
| 559 | |||
| 519 | retint_restore_args: | 560 | retint_restore_args: |
| 520 | cli | 561 | cli |
| 562 | /* | ||
| 563 | * The iretq could re-enable interrupts: | ||
| 564 | */ | ||
| 565 | TRACE_IRQS_IRETQ | ||
| 566 | restore_args: | ||
| 521 | RESTORE_ARGS 0,8,0 | 567 | RESTORE_ARGS 0,8,0 |
| 522 | iret_label: | 568 | iret_label: |
| 523 | iretq | 569 | iretq |
| @@ -530,6 +576,7 @@ iret_label: | |||
| 530 | /* running with kernel gs */ | 576 | /* running with kernel gs */ |
| 531 | bad_iret: | 577 | bad_iret: |
| 532 | movq $11,%rdi /* SIGSEGV */ | 578 | movq $11,%rdi /* SIGSEGV */ |
| 579 | TRACE_IRQS_ON | ||
| 533 | sti | 580 | sti |
| 534 | jmp do_exit | 581 | jmp do_exit |
| 535 | .previous | 582 | .previous |
| @@ -539,6 +586,7 @@ retint_careful: | |||
| 539 | CFI_RESTORE_STATE | 586 | CFI_RESTORE_STATE |
| 540 | bt $TIF_NEED_RESCHED,%edx | 587 | bt $TIF_NEED_RESCHED,%edx |
| 541 | jnc retint_signal | 588 | jnc retint_signal |
| 589 | TRACE_IRQS_ON | ||
| 542 | sti | 590 | sti |
| 543 | pushq %rdi | 591 | pushq %rdi |
| 544 | CFI_ADJUST_CFA_OFFSET 8 | 592 | CFI_ADJUST_CFA_OFFSET 8 |
| @@ -547,11 +595,13 @@ retint_careful: | |||
| 547 | CFI_ADJUST_CFA_OFFSET -8 | 595 | CFI_ADJUST_CFA_OFFSET -8 |
| 548 | GET_THREAD_INFO(%rcx) | 596 | GET_THREAD_INFO(%rcx) |
| 549 | cli | 597 | cli |
| 598 | TRACE_IRQS_OFF | ||
| 550 | jmp retint_check | 599 | jmp retint_check |
| 551 | 600 | ||
| 552 | retint_signal: | 601 | retint_signal: |
| 553 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | 602 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx |
| 554 | jz retint_swapgs | 603 | jz retint_swapgs |
| 604 | TRACE_IRQS_ON | ||
| 555 | sti | 605 | sti |
| 556 | SAVE_REST | 606 | SAVE_REST |
| 557 | movq $-1,ORIG_RAX(%rsp) | 607 | movq $-1,ORIG_RAX(%rsp) |
| @@ -560,6 +610,7 @@ retint_signal: | |||
| 560 | call do_notify_resume | 610 | call do_notify_resume |
| 561 | RESTORE_REST | 611 | RESTORE_REST |
| 562 | cli | 612 | cli |
| 613 | TRACE_IRQS_OFF | ||
| 563 | movl $_TIF_NEED_RESCHED,%edi | 614 | movl $_TIF_NEED_RESCHED,%edi |
| 564 | GET_THREAD_INFO(%rcx) | 615 | GET_THREAD_INFO(%rcx) |
| 565 | jmp retint_check | 616 | jmp retint_check |
| @@ -666,7 +717,7 @@ END(spurious_interrupt) | |||
| 666 | 717 | ||
| 667 | /* error code is on the stack already */ | 718 | /* error code is on the stack already */ |
| 668 | /* handle NMI like exceptions that can happen everywhere */ | 719 | /* handle NMI like exceptions that can happen everywhere */ |
| 669 | .macro paranoidentry sym, ist=0 | 720 | .macro paranoidentry sym, ist=0, irqtrace=1 |
| 670 | SAVE_ALL | 721 | SAVE_ALL |
| 671 | cld | 722 | cld |
| 672 | movl $1,%ebx | 723 | movl $1,%ebx |
| @@ -691,8 +742,73 @@ END(spurious_interrupt) | |||
| 691 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | 742 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) |
| 692 | .endif | 743 | .endif |
| 693 | cli | 744 | cli |
| 745 | .if \irqtrace | ||
| 746 | TRACE_IRQS_OFF | ||
| 747 | .endif | ||
| 694 | .endm | 748 | .endm |
| 695 | 749 | ||
| 750 | /* | ||
| 751 | * "Paranoid" exit path from exception stack. | ||
| 752 | * Paranoid because this is used by NMIs and cannot take | ||
| 753 | * any kernel state for granted. | ||
| 754 | * We don't do kernel preemption checks here, because only | ||
| 755 | * NMI should be common and it does not enable IRQs and | ||
| 756 | * cannot get reschedule ticks. | ||
| 757 | * | ||
| 758 | * "trace" is 0 for the NMI handler only, because irq-tracing | ||
| 759 | * is fundamentally NMI-unsafe. (we cannot change the soft and | ||
| 760 | * hard flags at once, atomically) | ||
| 761 | */ | ||
| 762 | .macro paranoidexit trace=1 | ||
| 763 | /* ebx: no swapgs flag */ | ||
| 764 | paranoid_exit\trace: | ||
| 765 | testl %ebx,%ebx /* swapgs needed? */ | ||
| 766 | jnz paranoid_restore\trace | ||
| 767 | testl $3,CS(%rsp) | ||
| 768 | jnz paranoid_userspace\trace | ||
| 769 | paranoid_swapgs\trace: | ||
| 770 | TRACE_IRQS_IRETQ 0 | ||
| 771 | swapgs | ||
| 772 | paranoid_restore\trace: | ||
| 773 | RESTORE_ALL 8 | ||
| 774 | iretq | ||
| 775 | paranoid_userspace\trace: | ||
| 776 | GET_THREAD_INFO(%rcx) | ||
| 777 | movl threadinfo_flags(%rcx),%ebx | ||
| 778 | andl $_TIF_WORK_MASK,%ebx | ||
| 779 | jz paranoid_swapgs\trace | ||
| 780 | movq %rsp,%rdi /* &pt_regs */ | ||
| 781 | call sync_regs | ||
| 782 | movq %rax,%rsp /* switch stack for scheduling */ | ||
| 783 | testl $_TIF_NEED_RESCHED,%ebx | ||
| 784 | jnz paranoid_schedule\trace | ||
| 785 | movl %ebx,%edx /* arg3: thread flags */ | ||
| 786 | .if \trace | ||
| 787 | TRACE_IRQS_ON | ||
| 788 | .endif | ||
| 789 | sti | ||
| 790 | xorl %esi,%esi /* arg2: oldset */ | ||
| 791 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
| 792 | call do_notify_resume | ||
| 793 | cli | ||
| 794 | .if \trace | ||
| 795 | TRACE_IRQS_OFF | ||
| 796 | .endif | ||
| 797 | jmp paranoid_userspace\trace | ||
| 798 | paranoid_schedule\trace: | ||
| 799 | .if \trace | ||
| 800 | TRACE_IRQS_ON | ||
| 801 | .endif | ||
| 802 | sti | ||
| 803 | call schedule | ||
| 804 | cli | ||
| 805 | .if \trace | ||
| 806 | TRACE_IRQS_OFF | ||
| 807 | .endif | ||
| 808 | jmp paranoid_userspace\trace | ||
| 809 | CFI_ENDPROC | ||
| 810 | .endm | ||
| 811 | |||
| 696 | /* | 812 | /* |
| 697 | * Exception entry point. This expects an error code/orig_rax on the stack | 813 | * Exception entry point. This expects an error code/orig_rax on the stack |
| 698 | * and the exception handler in %rax. | 814 | * and the exception handler in %rax. |
| @@ -748,6 +864,7 @@ error_exit: | |||
| 748 | movl %ebx,%eax | 864 | movl %ebx,%eax |
| 749 | RESTORE_REST | 865 | RESTORE_REST |
| 750 | cli | 866 | cli |
| 867 | TRACE_IRQS_OFF | ||
| 751 | GET_THREAD_INFO(%rcx) | 868 | GET_THREAD_INFO(%rcx) |
| 752 | testl %eax,%eax | 869 | testl %eax,%eax |
| 753 | jne retint_kernel | 870 | jne retint_kernel |
| @@ -755,6 +872,10 @@ error_exit: | |||
| 755 | movl $_TIF_WORK_MASK,%edi | 872 | movl $_TIF_WORK_MASK,%edi |
| 756 | andl %edi,%edx | 873 | andl %edi,%edx |
| 757 | jnz retint_careful | 874 | jnz retint_careful |
| 875 | /* | ||
| 876 | * The iret might restore flags: | ||
| 877 | */ | ||
| 878 | TRACE_IRQS_IRETQ | ||
| 758 | swapgs | 879 | swapgs |
| 759 | RESTORE_ARGS 0,8,0 | 880 | RESTORE_ARGS 0,8,0 |
| 760 | jmp iret_label | 881 | jmp iret_label |
| @@ -916,8 +1037,7 @@ KPROBE_ENTRY(debug) | |||
| 916 | pushq $0 | 1037 | pushq $0 |
| 917 | CFI_ADJUST_CFA_OFFSET 8 | 1038 | CFI_ADJUST_CFA_OFFSET 8 |
| 918 | paranoidentry do_debug, DEBUG_STACK | 1039 | paranoidentry do_debug, DEBUG_STACK |
| 919 | jmp paranoid_exit | 1040 | paranoidexit |
| 920 | CFI_ENDPROC | ||
| 921 | END(debug) | 1041 | END(debug) |
| 922 | .previous .text | 1042 | .previous .text |
| 923 | 1043 | ||
| @@ -926,49 +1046,13 @@ KPROBE_ENTRY(nmi) | |||
| 926 | INTR_FRAME | 1046 | INTR_FRAME |
| 927 | pushq $-1 | 1047 | pushq $-1 |
| 928 | CFI_ADJUST_CFA_OFFSET 8 | 1048 | CFI_ADJUST_CFA_OFFSET 8 |
| 929 | paranoidentry do_nmi | 1049 | paranoidentry do_nmi, 0, 0 |
| 930 | /* | 1050 | #ifdef CONFIG_TRACE_IRQFLAGS |
| 931 | * "Paranoid" exit path from exception stack. | 1051 | paranoidexit 0 |
| 932 | * Paranoid because this is used by NMIs and cannot take | 1052 | #else |
| 933 | * any kernel state for granted. | 1053 | jmp paranoid_exit1 |
| 934 | * We don't do kernel preemption checks here, because only | 1054 | CFI_ENDPROC |
| 935 | * NMI should be common and it does not enable IRQs and | 1055 | #endif |
| 936 | * cannot get reschedule ticks. | ||
| 937 | */ | ||
| 938 | /* ebx: no swapgs flag */ | ||
| 939 | paranoid_exit: | ||
| 940 | testl %ebx,%ebx /* swapgs needed? */ | ||
| 941 | jnz paranoid_restore | ||
| 942 | testl $3,CS(%rsp) | ||
| 943 | jnz paranoid_userspace | ||
| 944 | paranoid_swapgs: | ||
| 945 | swapgs | ||
| 946 | paranoid_restore: | ||
| 947 | RESTORE_ALL 8 | ||
| 948 | iretq | ||
| 949 | paranoid_userspace: | ||
| 950 | GET_THREAD_INFO(%rcx) | ||
| 951 | movl threadinfo_flags(%rcx),%ebx | ||
| 952 | andl $_TIF_WORK_MASK,%ebx | ||
| 953 | jz paranoid_swapgs | ||
| 954 | movq %rsp,%rdi /* &pt_regs */ | ||
| 955 | call sync_regs | ||
| 956 | movq %rax,%rsp /* switch stack for scheduling */ | ||
| 957 | testl $_TIF_NEED_RESCHED,%ebx | ||
| 958 | jnz paranoid_schedule | ||
| 959 | movl %ebx,%edx /* arg3: thread flags */ | ||
| 960 | sti | ||
| 961 | xorl %esi,%esi /* arg2: oldset */ | ||
| 962 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
| 963 | call do_notify_resume | ||
| 964 | cli | ||
| 965 | jmp paranoid_userspace | ||
| 966 | paranoid_schedule: | ||
| 967 | sti | ||
| 968 | call schedule | ||
| 969 | cli | ||
| 970 | jmp paranoid_userspace | ||
| 971 | CFI_ENDPROC | ||
| 972 | END(nmi) | 1056 | END(nmi) |
| 973 | .previous .text | 1057 | .previous .text |
| 974 | 1058 | ||
| @@ -977,7 +1061,7 @@ KPROBE_ENTRY(int3) | |||
| 977 | pushq $0 | 1061 | pushq $0 |
| 978 | CFI_ADJUST_CFA_OFFSET 8 | 1062 | CFI_ADJUST_CFA_OFFSET 8 |
| 979 | paranoidentry do_int3, DEBUG_STACK | 1063 | paranoidentry do_int3, DEBUG_STACK |
| 980 | jmp paranoid_exit | 1064 | jmp paranoid_exit1 |
| 981 | CFI_ENDPROC | 1065 | CFI_ENDPROC |
| 982 | END(int3) | 1066 | END(int3) |
| 983 | .previous .text | 1067 | .previous .text |
| @@ -1006,7 +1090,7 @@ END(reserved) | |||
| 1006 | ENTRY(double_fault) | 1090 | ENTRY(double_fault) |
| 1007 | XCPT_FRAME | 1091 | XCPT_FRAME |
| 1008 | paranoidentry do_double_fault | 1092 | paranoidentry do_double_fault |
| 1009 | jmp paranoid_exit | 1093 | jmp paranoid_exit1 |
| 1010 | CFI_ENDPROC | 1094 | CFI_ENDPROC |
| 1011 | END(double_fault) | 1095 | END(double_fault) |
| 1012 | 1096 | ||
| @@ -1022,7 +1106,7 @@ END(segment_not_present) | |||
| 1022 | ENTRY(stack_segment) | 1106 | ENTRY(stack_segment) |
| 1023 | XCPT_FRAME | 1107 | XCPT_FRAME |
| 1024 | paranoidentry do_stack_segment | 1108 | paranoidentry do_stack_segment |
| 1025 | jmp paranoid_exit | 1109 | jmp paranoid_exit1 |
| 1026 | CFI_ENDPROC | 1110 | CFI_ENDPROC |
| 1027 | END(stack_segment) | 1111 | END(stack_segment) |
| 1028 | 1112 | ||
| @@ -1050,7 +1134,7 @@ ENTRY(machine_check) | |||
| 1050 | pushq $0 | 1134 | pushq $0 |
| 1051 | CFI_ADJUST_CFA_OFFSET 8 | 1135 | CFI_ADJUST_CFA_OFFSET 8 |
| 1052 | paranoidentry do_machine_check | 1136 | paranoidentry do_machine_check |
| 1053 | jmp paranoid_exit | 1137 | jmp paranoid_exit1 |
| 1054 | CFI_ENDPROC | 1138 | CFI_ENDPROC |
| 1055 | END(machine_check) | 1139 | END(machine_check) |
| 1056 | #endif | 1140 | #endif |
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index e6a71c9556d9..36647ce6aecb 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c | |||
| @@ -85,6 +85,11 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
| 85 | clear_bss(); | 85 | clear_bss(); |
| 86 | 86 | ||
| 87 | /* | 87 | /* |
| 88 | * This must be called really, really early: | ||
| 89 | */ | ||
| 90 | lockdep_init(); | ||
| 91 | |||
| 92 | /* | ||
| 88 | * switch to init_level4_pgt from boot_level4_pgt | 93 | * switch to init_level4_pgt from boot_level4_pgt |
| 89 | */ | 94 | */ |
| 90 | memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); | 95 | memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); |
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index a1f1df5f7bfc..5221a53e90c1 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c | |||
| @@ -177,8 +177,10 @@ asmlinkage void do_softirq(void) | |||
| 177 | local_irq_save(flags); | 177 | local_irq_save(flags); |
| 178 | pending = local_softirq_pending(); | 178 | pending = local_softirq_pending(); |
| 179 | /* Switch to interrupt stack */ | 179 | /* Switch to interrupt stack */ |
| 180 | if (pending) | 180 | if (pending) { |
| 181 | call_softirq(); | 181 | call_softirq(); |
| 182 | WARN_ON_ONCE(softirq_count()); | ||
| 183 | } | ||
| 182 | local_irq_restore(flags); | 184 | local_irq_restore(flags); |
| 183 | } | 185 | } |
| 184 | EXPORT_SYMBOL(do_softirq); | 186 | EXPORT_SYMBOL(do_softirq); |
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 476c1472fc07..5baa0c726e97 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
| @@ -127,7 +127,7 @@ void __cpuinit nmi_watchdog_default(void) | |||
| 127 | static __init void nmi_cpu_busy(void *data) | 127 | static __init void nmi_cpu_busy(void *data) |
| 128 | { | 128 | { |
| 129 | volatile int *endflag = data; | 129 | volatile int *endflag = data; |
| 130 | local_irq_enable(); | 130 | local_irq_enable_in_hardirq(); |
| 131 | /* Intentionally don't use cpu_relax here. This is | 131 | /* Intentionally don't use cpu_relax here. This is |
| 132 | to make sure that the performance counter really ticks, | 132 | to make sure that the performance counter really ticks, |
| 133 | even if there is a simulator or similar that catches the | 133 | even if there is a simulator or similar that catches the |
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index ca56e19b8b6e..bb6745d13b8f 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c | |||
| @@ -296,7 +296,7 @@ void __show_regs(struct pt_regs * regs) | |||
| 296 | system_utsname.version); | 296 | system_utsname.version); |
| 297 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); | 297 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); |
| 298 | printk_address(regs->rip); | 298 | printk_address(regs->rip); |
| 299 | printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, | 299 | printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, |
| 300 | regs->eflags); | 300 | regs->eflags); |
| 301 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", | 301 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", |
| 302 | regs->rax, regs->rbx, regs->rcx); | 302 | regs->rax, regs->rbx, regs->rcx); |
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 9705a6a384f1..b7c705969791 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
| @@ -775,6 +775,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) | |||
| 775 | }; | 775 | }; |
| 776 | DECLARE_WORK(work, do_fork_idle, &c_idle); | 776 | DECLARE_WORK(work, do_fork_idle, &c_idle); |
| 777 | 777 | ||
| 778 | lockdep_set_class(&c_idle.done.wait.lock, &waitqueue_lock_key); | ||
| 779 | |||
| 778 | /* allocate memory for gdts of secondary cpus. Hotplug is considered */ | 780 | /* allocate memory for gdts of secondary cpus. Hotplug is considered */ |
| 779 | if (!cpu_gdt_descr[cpu].address && | 781 | if (!cpu_gdt_descr[cpu].address && |
| 780 | !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { | 782 | !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { |
diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c new file mode 100644 index 000000000000..32cf55eb9af8 --- /dev/null +++ b/arch/x86_64/kernel/stacktrace.c | |||
| @@ -0,0 +1,221 @@ | |||
| 1 | /* | ||
| 2 | * arch/x86_64/kernel/stacktrace.c | ||
| 3 | * | ||
| 4 | * Stack trace management functions | ||
| 5 | * | ||
| 6 | * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
| 7 | */ | ||
| 8 | #include <linux/sched.h> | ||
| 9 | #include <linux/stacktrace.h> | ||
| 10 | |||
| 11 | #include <asm/smp.h> | ||
| 12 | |||
| 13 | static inline int | ||
| 14 | in_range(unsigned long start, unsigned long addr, unsigned long end) | ||
| 15 | { | ||
| 16 | return addr >= start && addr <= end; | ||
| 17 | } | ||
| 18 | |||
| 19 | static unsigned long | ||
| 20 | get_stack_end(struct task_struct *task, unsigned long stack) | ||
| 21 | { | ||
| 22 | unsigned long stack_start, stack_end, flags; | ||
| 23 | int i, cpu; | ||
| 24 | |||
| 25 | /* | ||
| 26 | * The most common case is that we are in the task stack: | ||
| 27 | */ | ||
| 28 | stack_start = (unsigned long)task->thread_info; | ||
| 29 | stack_end = stack_start + THREAD_SIZE; | ||
| 30 | |||
| 31 | if (in_range(stack_start, stack, stack_end)) | ||
| 32 | return stack_end; | ||
| 33 | |||
| 34 | /* | ||
| 35 | * We are in an interrupt if irqstackptr is set: | ||
| 36 | */ | ||
| 37 | raw_local_irq_save(flags); | ||
| 38 | cpu = safe_smp_processor_id(); | ||
| 39 | stack_end = (unsigned long)cpu_pda(cpu)->irqstackptr; | ||
| 40 | |||
| 41 | if (stack_end) { | ||
| 42 | stack_start = stack_end & ~(IRQSTACKSIZE-1); | ||
| 43 | if (in_range(stack_start, stack, stack_end)) | ||
| 44 | goto out_restore; | ||
| 45 | /* | ||
| 46 | * We get here if we are in an IRQ context but we | ||
| 47 | * are also in an exception stack. | ||
| 48 | */ | ||
| 49 | } | ||
| 50 | |||
| 51 | /* | ||
| 52 | * Iterate over all exception stacks, and figure out whether | ||
| 53 | * 'stack' is in one of them: | ||
| 54 | */ | ||
| 55 | for (i = 0; i < N_EXCEPTION_STACKS; i++) { | ||
| 56 | /* | ||
| 57 | * set 'end' to the end of the exception stack. | ||
| 58 | */ | ||
| 59 | stack_end = per_cpu(init_tss, cpu).ist[i]; | ||
| 60 | stack_start = stack_end - EXCEPTION_STKSZ; | ||
| 61 | |||
| 62 | /* | ||
| 63 | * Is 'stack' above this exception frame's end? | ||
| 64 | * If yes then skip to the next frame. | ||
| 65 | */ | ||
| 66 | if (stack >= stack_end) | ||
| 67 | continue; | ||
| 68 | /* | ||
| 69 | * Is 'stack' above this exception frame's start address? | ||
| 70 | * If yes then we found the right frame. | ||
| 71 | */ | ||
| 72 | if (stack >= stack_start) | ||
| 73 | goto out_restore; | ||
| 74 | |||
| 75 | /* | ||
| 76 | * If this is a debug stack, and if it has a larger size than | ||
| 77 | * the usual exception stacks, then 'stack' might still | ||
| 78 | * be within the lower portion of the debug stack: | ||
| 79 | */ | ||
| 80 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
| 81 | if (i == DEBUG_STACK - 1 && stack >= stack_end - DEBUG_STKSZ) { | ||
| 82 | /* | ||
| 83 | * Black magic. A large debug stack is composed of | ||
| 84 | * multiple exception stack entries, which we | ||
| 85 | * iterate through now. Dont look: | ||
| 86 | */ | ||
| 87 | do { | ||
| 88 | stack_end -= EXCEPTION_STKSZ; | ||
| 89 | stack_start -= EXCEPTION_STKSZ; | ||
| 90 | } while (stack < stack_start); | ||
| 91 | |||
| 92 | goto out_restore; | ||
| 93 | } | ||
| 94 | #endif | ||
| 95 | } | ||
| 96 | /* | ||
| 97 | * Ok, 'stack' is not pointing to any of the system stacks. | ||
| 98 | */ | ||
| 99 | stack_end = 0; | ||
| 100 | |||
| 101 | out_restore: | ||
| 102 | raw_local_irq_restore(flags); | ||
| 103 | |||
| 104 | return stack_end; | ||
| 105 | } | ||
| 106 | |||
| 107 | |||
| 108 | /* | ||
| 109 | * Save stack-backtrace addresses into a stack_trace buffer: | ||
| 110 | */ | ||
| 111 | static inline unsigned long | ||
| 112 | save_context_stack(struct stack_trace *trace, unsigned int skip, | ||
| 113 | unsigned long stack, unsigned long stack_end) | ||
| 114 | { | ||
| 115 | unsigned long addr; | ||
| 116 | |||
| 117 | #ifdef CONFIG_FRAME_POINTER | ||
| 118 | unsigned long prev_stack = 0; | ||
| 119 | |||
| 120 | while (in_range(prev_stack, stack, stack_end)) { | ||
| 121 | pr_debug("stack: %p\n", (void *)stack); | ||
| 122 | addr = (unsigned long)(((unsigned long *)stack)[1]); | ||
| 123 | pr_debug("addr: %p\n", (void *)addr); | ||
| 124 | if (!skip) | ||
| 125 | trace->entries[trace->nr_entries++] = addr-1; | ||
| 126 | else | ||
| 127 | skip--; | ||
| 128 | if (trace->nr_entries >= trace->max_entries) | ||
| 129 | break; | ||
| 130 | if (!addr) | ||
| 131 | return 0; | ||
| 132 | /* | ||
| 133 | * Stack frames must go forwards (otherwise a loop could | ||
| 134 | * happen if the stackframe is corrupted), so we move | ||
| 135 | * prev_stack forwards: | ||
| 136 | */ | ||
| 137 | prev_stack = stack; | ||
| 138 | stack = (unsigned long)(((unsigned long *)stack)[0]); | ||
| 139 | } | ||
| 140 | pr_debug("invalid: %p\n", (void *)stack); | ||
| 141 | #else | ||
| 142 | while (stack < stack_end) { | ||
| 143 | addr = ((unsigned long *)stack)[0]; | ||
| 144 | stack += sizeof(long); | ||
| 145 | if (__kernel_text_address(addr)) { | ||
| 146 | if (!skip) | ||
| 147 | trace->entries[trace->nr_entries++] = addr-1; | ||
| 148 | else | ||
| 149 | skip--; | ||
| 150 | if (trace->nr_entries >= trace->max_entries) | ||
| 151 | break; | ||
| 152 | } | ||
| 153 | } | ||
| 154 | #endif | ||
| 155 | return stack; | ||
| 156 | } | ||
| 157 | |||
| 158 | #define MAX_STACKS 10 | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Save stack-backtrace addresses into a stack_trace buffer. | ||
| 162 | * If all_contexts is set, all contexts (hardirq, softirq and process) | ||
| 163 | * are saved. If not set then only the current context is saved. | ||
| 164 | */ | ||
| 165 | void save_stack_trace(struct stack_trace *trace, | ||
| 166 | struct task_struct *task, int all_contexts, | ||
| 167 | unsigned int skip) | ||
| 168 | { | ||
| 169 | unsigned long stack = (unsigned long)&stack; | ||
| 170 | int i, nr_stacks = 0, stacks_done[MAX_STACKS]; | ||
| 171 | |||
| 172 | WARN_ON(trace->nr_entries || !trace->max_entries); | ||
| 173 | |||
| 174 | if (!task) | ||
| 175 | task = current; | ||
| 176 | |||
| 177 | pr_debug("task: %p, ti: %p\n", task, task->thread_info); | ||
| 178 | |||
| 179 | if (!task || task == current) { | ||
| 180 | /* Grab rbp right from our regs: */ | ||
| 181 | asm ("mov %%rbp, %0" : "=r" (stack)); | ||
| 182 | pr_debug("rbp: %p\n", (void *)stack); | ||
| 183 | } else { | ||
| 184 | /* rbp is the last reg pushed by switch_to(): */ | ||
| 185 | stack = task->thread.rsp; | ||
| 186 | pr_debug("other task rsp: %p\n", (void *)stack); | ||
| 187 | stack = (unsigned long)(((unsigned long *)stack)[0]); | ||
| 188 | pr_debug("other task rbp: %p\n", (void *)stack); | ||
| 189 | } | ||
| 190 | |||
| 191 | while (1) { | ||
| 192 | unsigned long stack_end = get_stack_end(task, stack); | ||
| 193 | |||
| 194 | pr_debug("stack: %p\n", (void *)stack); | ||
| 195 | pr_debug("stack end: %p\n", (void *)stack_end); | ||
| 196 | |||
| 197 | /* | ||
| 198 | * Invalid stack addres? | ||
| 199 | */ | ||
| 200 | if (!stack_end) | ||
| 201 | return; | ||
| 202 | /* | ||
| 203 | * Were we in this stack already? (recursion) | ||
| 204 | */ | ||
| 205 | for (i = 0; i < nr_stacks; i++) | ||
| 206 | if (stacks_done[i] == stack_end) | ||
| 207 | return; | ||
| 208 | stacks_done[nr_stacks] = stack_end; | ||
| 209 | |||
| 210 | stack = save_context_stack(trace, skip, stack, stack_end); | ||
| 211 | if (!all_contexts || !stack || | ||
| 212 | trace->nr_entries >= trace->max_entries) | ||
| 213 | return; | ||
| 214 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
| 215 | if (trace->nr_entries >= trace->max_entries) | ||
| 216 | return; | ||
| 217 | if (++nr_stacks >= MAX_STACKS) | ||
| 218 | return; | ||
| 219 | } | ||
| 220 | } | ||
| 221 | |||
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 5a5311d3de0f..79d05c482072 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c | |||
| @@ -110,28 +110,31 @@ static int kstack_depth_to_print = 12; | |||
| 110 | static int call_trace = 1; | 110 | static int call_trace = 1; |
| 111 | 111 | ||
| 112 | #ifdef CONFIG_KALLSYMS | 112 | #ifdef CONFIG_KALLSYMS |
| 113 | #include <linux/kallsyms.h> | 113 | # include <linux/kallsyms.h> |
| 114 | int printk_address(unsigned long address) | 114 | void printk_address(unsigned long address) |
| 115 | { | 115 | { |
| 116 | unsigned long offset = 0, symsize; | 116 | unsigned long offset = 0, symsize; |
| 117 | const char *symname; | 117 | const char *symname; |
| 118 | char *modname; | 118 | char *modname; |
| 119 | char *delim = ":"; | 119 | char *delim = ":"; |
| 120 | char namebuf[128]; | 120 | char namebuf[128]; |
| 121 | 121 | ||
| 122 | symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); | 122 | symname = kallsyms_lookup(address, &symsize, &offset, |
| 123 | if (!symname) | 123 | &modname, namebuf); |
| 124 | return printk("[<%016lx>]", address); | 124 | if (!symname) { |
| 125 | if (!modname) | 125 | printk(" [<%016lx>]\n", address); |
| 126 | return; | ||
| 127 | } | ||
| 128 | if (!modname) | ||
| 126 | modname = delim = ""; | 129 | modname = delim = ""; |
| 127 | return printk("<%016lx>{%s%s%s%s%+ld}", | 130 | printk(" [<%016lx>] %s%s%s%s+0x%lx/0x%lx\n", |
| 128 | address, delim, modname, delim, symname, offset); | 131 | address, delim, modname, delim, symname, offset, symsize); |
| 129 | } | 132 | } |
| 130 | #else | 133 | #else |
| 131 | int printk_address(unsigned long address) | 134 | void printk_address(unsigned long address) |
| 132 | { | 135 | { |
| 133 | return printk("[<%016lx>]", address); | 136 | printk(" [<%016lx>]\n", address); |
| 134 | } | 137 | } |
| 135 | #endif | 138 | #endif |
| 136 | 139 | ||
| 137 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 140 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
| @@ -149,10 +152,22 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
| 149 | }; | 152 | }; |
| 150 | unsigned k; | 153 | unsigned k; |
| 151 | 154 | ||
| 155 | /* | ||
| 156 | * Iterate over all exception stacks, and figure out whether | ||
| 157 | * 'stack' is in one of them: | ||
| 158 | */ | ||
| 152 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { | 159 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { |
| 153 | unsigned long end; | 160 | unsigned long end; |
| 154 | 161 | ||
| 162 | /* | ||
| 163 | * set 'end' to the end of the exception stack. | ||
| 164 | */ | ||
| 155 | switch (k + 1) { | 165 | switch (k + 1) { |
| 166 | /* | ||
| 167 | * TODO: this block is not needed i think, because | ||
| 168 | * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK] | ||
| 169 | * properly too. | ||
| 170 | */ | ||
| 156 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 171 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
| 157 | case DEBUG_STACK: | 172 | case DEBUG_STACK: |
| 158 | end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ; | 173 | end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ; |
| @@ -162,19 +177,43 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
| 162 | end = per_cpu(init_tss, cpu).ist[k]; | 177 | end = per_cpu(init_tss, cpu).ist[k]; |
| 163 | break; | 178 | break; |
| 164 | } | 179 | } |
| 180 | /* | ||
| 181 | * Is 'stack' above this exception frame's end? | ||
| 182 | * If yes then skip to the next frame. | ||
| 183 | */ | ||
| 165 | if (stack >= end) | 184 | if (stack >= end) |
| 166 | continue; | 185 | continue; |
| 186 | /* | ||
| 187 | * Is 'stack' above this exception frame's start address? | ||
| 188 | * If yes then we found the right frame. | ||
| 189 | */ | ||
| 167 | if (stack >= end - EXCEPTION_STKSZ) { | 190 | if (stack >= end - EXCEPTION_STKSZ) { |
| 191 | /* | ||
| 192 | * Make sure we only iterate through an exception | ||
| 193 | * stack once. If it comes up for the second time | ||
| 194 | * then there's something wrong going on - just | ||
| 195 | * break out and return NULL: | ||
| 196 | */ | ||
| 168 | if (*usedp & (1U << k)) | 197 | if (*usedp & (1U << k)) |
| 169 | break; | 198 | break; |
| 170 | *usedp |= 1U << k; | 199 | *usedp |= 1U << k; |
| 171 | *idp = ids[k]; | 200 | *idp = ids[k]; |
| 172 | return (unsigned long *)end; | 201 | return (unsigned long *)end; |
| 173 | } | 202 | } |
| 203 | /* | ||
| 204 | * If this is a debug stack, and if it has a larger size than | ||
| 205 | * the usual exception stacks, then 'stack' might still | ||
| 206 | * be within the lower portion of the debug stack: | ||
| 207 | */ | ||
| 174 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 208 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
| 175 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { | 209 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { |
| 176 | unsigned j = N_EXCEPTION_STACKS - 1; | 210 | unsigned j = N_EXCEPTION_STACKS - 1; |
| 177 | 211 | ||
| 212 | /* | ||
| 213 | * Black magic. A large debug stack is composed of | ||
| 214 | * multiple exception stack entries, which we | ||
| 215 | * iterate through now. Dont look: | ||
| 216 | */ | ||
| 178 | do { | 217 | do { |
| 179 | ++j; | 218 | ++j; |
| 180 | end -= EXCEPTION_STKSZ; | 219 | end -= EXCEPTION_STKSZ; |
| @@ -193,20 +232,14 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
| 193 | 232 | ||
| 194 | static int show_trace_unwind(struct unwind_frame_info *info, void *context) | 233 | static int show_trace_unwind(struct unwind_frame_info *info, void *context) |
| 195 | { | 234 | { |
| 196 | int i = 11, n = 0; | 235 | int n = 0; |
| 197 | 236 | ||
| 198 | while (unwind(info) == 0 && UNW_PC(info)) { | 237 | while (unwind(info) == 0 && UNW_PC(info)) { |
| 199 | ++n; | 238 | n++; |
| 200 | if (i > 50) { | 239 | printk_address(UNW_PC(info)); |
| 201 | printk("\n "); | ||
| 202 | i = 7; | ||
| 203 | } else | ||
| 204 | i += printk(" "); | ||
| 205 | i += printk_address(UNW_PC(info)); | ||
| 206 | if (arch_unw_user_mode(info)) | 240 | if (arch_unw_user_mode(info)) |
| 207 | break; | 241 | break; |
| 208 | } | 242 | } |
| 209 | printk("\n"); | ||
| 210 | return n; | 243 | return n; |
| 211 | } | 244 | } |
| 212 | 245 | ||
| @@ -224,7 +257,7 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s | |||
| 224 | int i = 11; | 257 | int i = 11; |
| 225 | unsigned used = 0; | 258 | unsigned used = 0; |
| 226 | 259 | ||
| 227 | printk("\nCall Trace:"); | 260 | printk("\nCall Trace:\n"); |
| 228 | 261 | ||
| 229 | if (!tsk) | 262 | if (!tsk) |
| 230 | tsk = current; | 263 | tsk = current; |
| @@ -250,16 +283,15 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s | |||
| 250 | } | 283 | } |
| 251 | } | 284 | } |
| 252 | 285 | ||
| 286 | /* | ||
| 287 | * Print function call entries within a stack. 'cond' is the | ||
| 288 | * "end of stackframe" condition, that the 'stack++' | ||
| 289 | * iteration will eventually trigger. | ||
| 290 | */ | ||
| 253 | #define HANDLE_STACK(cond) \ | 291 | #define HANDLE_STACK(cond) \ |
| 254 | do while (cond) { \ | 292 | do while (cond) { \ |
| 255 | unsigned long addr = *stack++; \ | 293 | unsigned long addr = *stack++; \ |
| 256 | if (kernel_text_address(addr)) { \ | 294 | if (kernel_text_address(addr)) { \ |
| 257 | if (i > 50) { \ | ||
| 258 | printk("\n "); \ | ||
| 259 | i = 0; \ | ||
| 260 | } \ | ||
| 261 | else \ | ||
| 262 | i += printk(" "); \ | ||
| 263 | /* \ | 295 | /* \ |
| 264 | * If the address is either in the text segment of the \ | 296 | * If the address is either in the text segment of the \ |
| 265 | * kernel, or in the region which contains vmalloc'ed \ | 297 | * kernel, or in the region which contains vmalloc'ed \ |
| @@ -268,20 +300,30 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s | |||
| 268 | * down the cause of the crash will be able to figure \ | 300 | * down the cause of the crash will be able to figure \ |
| 269 | * out the call path that was taken. \ | 301 | * out the call path that was taken. \ |
| 270 | */ \ | 302 | */ \ |
| 271 | i += printk_address(addr); \ | 303 | printk_address(addr); \ |
| 272 | } \ | 304 | } \ |
| 273 | } while (0) | 305 | } while (0) |
| 274 | 306 | ||
| 275 | for(; ; ) { | 307 | /* |
| 308 | * Print function call entries in all stacks, starting at the | ||
| 309 | * current stack address. If the stacks consist of nested | ||
| 310 | * exceptions | ||
| 311 | */ | ||
| 312 | for ( ; ; ) { | ||
| 276 | const char *id; | 313 | const char *id; |
| 277 | unsigned long *estack_end; | 314 | unsigned long *estack_end; |
| 278 | estack_end = in_exception_stack(cpu, (unsigned long)stack, | 315 | estack_end = in_exception_stack(cpu, (unsigned long)stack, |
| 279 | &used, &id); | 316 | &used, &id); |
| 280 | 317 | ||
| 281 | if (estack_end) { | 318 | if (estack_end) { |
| 282 | i += printk(" <%s>", id); | 319 | printk(" <%s>", id); |
| 283 | HANDLE_STACK (stack < estack_end); | 320 | HANDLE_STACK (stack < estack_end); |
| 284 | i += printk(" <EOE>"); | 321 | printk(" <EOE>"); |
| 322 | /* | ||
| 323 | * We link to the next stack via the | ||
| 324 | * second-to-last pointer (index -2 to end) in the | ||
| 325 | * exception stack: | ||
| 326 | */ | ||
| 285 | stack = (unsigned long *) estack_end[-2]; | 327 | stack = (unsigned long *) estack_end[-2]; |
| 286 | continue; | 328 | continue; |
| 287 | } | 329 | } |
| @@ -291,19 +333,28 @@ void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s | |||
| 291 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); | 333 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); |
| 292 | 334 | ||
| 293 | if (stack >= irqstack && stack < irqstack_end) { | 335 | if (stack >= irqstack && stack < irqstack_end) { |
| 294 | i += printk(" <IRQ>"); | 336 | printk(" <IRQ>"); |
| 295 | HANDLE_STACK (stack < irqstack_end); | 337 | HANDLE_STACK (stack < irqstack_end); |
| 338 | /* | ||
| 339 | * We link to the next stack (which would be | ||
| 340 | * the process stack normally) the last | ||
| 341 | * pointer (index -1 to end) in the IRQ stack: | ||
| 342 | */ | ||
| 296 | stack = (unsigned long *) (irqstack_end[-1]); | 343 | stack = (unsigned long *) (irqstack_end[-1]); |
| 297 | irqstack_end = NULL; | 344 | irqstack_end = NULL; |
| 298 | i += printk(" <EOI>"); | 345 | printk(" <EOI>"); |
| 299 | continue; | 346 | continue; |
| 300 | } | 347 | } |
| 301 | } | 348 | } |
| 302 | break; | 349 | break; |
| 303 | } | 350 | } |
| 304 | 351 | ||
| 352 | /* | ||
| 353 | * This prints the process stack: | ||
| 354 | */ | ||
| 305 | HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); | 355 | HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); |
| 306 | #undef HANDLE_STACK | 356 | #undef HANDLE_STACK |
| 357 | |||
| 307 | printk("\n"); | 358 | printk("\n"); |
| 308 | } | 359 | } |
| 309 | 360 | ||
| @@ -337,8 +388,8 @@ static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned | |||
| 337 | break; | 388 | break; |
| 338 | } | 389 | } |
| 339 | if (i && ((i % 4) == 0)) | 390 | if (i && ((i % 4) == 0)) |
| 340 | printk("\n "); | 391 | printk("\n"); |
| 341 | printk("%016lx ", *stack++); | 392 | printk(" %016lx", *stack++); |
| 342 | touch_nmi_watchdog(); | 393 | touch_nmi_watchdog(); |
| 343 | } | 394 | } |
| 344 | show_trace(tsk, regs, rsp); | 395 | show_trace(tsk, regs, rsp); |
