diff options
| author | Andrew Morton <akpm@osdl.org> | 2006-12-06 20:14:01 -0500 |
|---|---|---|
| committer | Andi Kleen <andi@basil.nowhere.org> | 2006-12-06 20:14:01 -0500 |
| commit | bb81a09e55eaf7e5f798468ab971469b6f66a259 (patch) | |
| tree | cf1ed6b0ad75137361228955535044fd4630a57b | |
| parent | e5e3a0428968dcc1f9318ce1c941a918e99f8b84 (diff) | |
[PATCH] x86: all cpu backtrace
When a spinlock lockup occurs, arrange for the NMI code to emit an all-cpu
backtrace, so we get to see which CPU is holding the lock, and where.
Cc: Andi Kleen <ak@muc.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Andi Kleen <ak@suse.de>
| -rw-r--r-- | arch/i386/kernel/nmi.c | 26 | ||||
| -rw-r--r-- | arch/x86_64/kernel/nmi.c | 29 | ||||
| -rw-r--r-- | include/asm-i386/nmi.h | 8 | ||||
| -rw-r--r-- | include/asm-x86_64/nmi.h | 3 | ||||
| -rw-r--r-- | include/linux/nmi.h | 5 | ||||
| -rw-r--r-- | lib/spinlock_debug.c | 4 |
6 files changed, 74 insertions, 1 deletions
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index eaafe233a5da..171194ccb7bc 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/percpu.h> | 22 | #include <linux/percpu.h> |
| 23 | #include <linux/dmi.h> | 23 | #include <linux/dmi.h> |
| 24 | #include <linux/kprobes.h> | 24 | #include <linux/kprobes.h> |
| 25 | #include <linux/cpumask.h> | ||
| 25 | 26 | ||
| 26 | #include <asm/smp.h> | 27 | #include <asm/smp.h> |
| 27 | #include <asm/nmi.h> | 28 | #include <asm/nmi.h> |
| @@ -42,6 +43,8 @@ int nmi_watchdog_enabled; | |||
| 42 | static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); | 43 | static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); |
| 43 | static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); | 44 | static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); |
| 44 | 45 | ||
| 46 | static cpumask_t backtrace_mask = CPU_MASK_NONE; | ||
| 47 | |||
| 45 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's | 48 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's |
| 46 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) | 49 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) |
| 47 | */ | 50 | */ |
| @@ -907,6 +910,16 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
| 907 | touched = 1; | 910 | touched = 1; |
| 908 | } | 911 | } |
| 909 | 912 | ||
| 913 | if (cpu_isset(cpu, backtrace_mask)) { | ||
| 914 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | ||
| 915 | |||
| 916 | spin_lock(&lock); | ||
| 917 | printk("NMI backtrace for cpu %d\n", cpu); | ||
| 918 | dump_stack(); | ||
| 919 | spin_unlock(&lock); | ||
| 920 | cpu_clear(cpu, backtrace_mask); | ||
| 921 | } | ||
| 922 | |||
| 910 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; | 923 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; |
| 911 | 924 | ||
| 912 | /* if the apic timer isn't firing, this cpu isn't doing much */ | 925 | /* if the apic timer isn't firing, this cpu isn't doing much */ |
| @@ -1033,6 +1046,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, | |||
| 1033 | 1046 | ||
| 1034 | #endif | 1047 | #endif |
| 1035 | 1048 | ||
| 1049 | void __trigger_all_cpu_backtrace(void) | ||
| 1050 | { | ||
| 1051 | int i; | ||
| 1052 | |||
| 1053 | backtrace_mask = cpu_online_map; | ||
| 1054 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | ||
| 1055 | for (i = 0; i < 10 * 1000; i++) { | ||
| 1056 | if (cpus_empty(backtrace_mask)) | ||
| 1057 | break; | ||
| 1058 | mdelay(1); | ||
| 1059 | } | ||
| 1060 | } | ||
| 1061 | |||
| 1036 | EXPORT_SYMBOL(nmi_active); | 1062 | EXPORT_SYMBOL(nmi_active); |
| 1037 | EXPORT_SYMBOL(nmi_watchdog); | 1063 | EXPORT_SYMBOL(nmi_watchdog); |
| 1038 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); | 1064 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); |
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 7af9cb3e2d99..27e95e7922c1 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
| @@ -12,14 +12,15 @@ | |||
| 12 | * Mikael Pettersson : PM converted to driver model. Disable/enable API. | 12 | * Mikael Pettersson : PM converted to driver model. Disable/enable API. |
| 13 | */ | 13 | */ |
| 14 | 14 | ||
| 15 | #include <linux/nmi.h> | ||
| 15 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
| 16 | #include <linux/delay.h> | 17 | #include <linux/delay.h> |
| 17 | #include <linux/interrupt.h> | 18 | #include <linux/interrupt.h> |
| 18 | #include <linux/module.h> | 19 | #include <linux/module.h> |
| 19 | #include <linux/sysdev.h> | 20 | #include <linux/sysdev.h> |
| 20 | #include <linux/nmi.h> | ||
| 21 | #include <linux/sysctl.h> | 21 | #include <linux/sysctl.h> |
| 22 | #include <linux/kprobes.h> | 22 | #include <linux/kprobes.h> |
| 23 | #include <linux/cpumask.h> | ||
| 23 | 24 | ||
| 24 | #include <asm/smp.h> | 25 | #include <asm/smp.h> |
| 25 | #include <asm/nmi.h> | 26 | #include <asm/nmi.h> |
| @@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi; | |||
| 41 | static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); | 42 | static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); |
| 42 | static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); | 43 | static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); |
| 43 | 44 | ||
| 45 | static cpumask_t backtrace_mask = CPU_MASK_NONE; | ||
| 46 | |||
| 44 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's | 47 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's |
| 45 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) | 48 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) |
| 46 | */ | 49 | */ |
| @@ -782,6 +785,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
| 782 | { | 785 | { |
| 783 | int sum; | 786 | int sum; |
| 784 | int touched = 0; | 787 | int touched = 0; |
| 788 | int cpu = smp_processor_id(); | ||
| 785 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 789 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
| 786 | u64 dummy; | 790 | u64 dummy; |
| 787 | int rc=0; | 791 | int rc=0; |
| @@ -799,6 +803,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
| 799 | touched = 1; | 803 | touched = 1; |
| 800 | } | 804 | } |
| 801 | 805 | ||
| 806 | if (cpu_isset(cpu, backtrace_mask)) { | ||
| 807 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | ||
| 808 | |||
| 809 | spin_lock(&lock); | ||
| 810 | printk("NMI backtrace for cpu %d\n", cpu); | ||
| 811 | dump_stack(); | ||
| 812 | spin_unlock(&lock); | ||
| 813 | cpu_clear(cpu, backtrace_mask); | ||
| 814 | } | ||
| 815 | |||
| 802 | #ifdef CONFIG_X86_MCE | 816 | #ifdef CONFIG_X86_MCE |
| 803 | /* Could check oops_in_progress here too, but it's safer | 817 | /* Could check oops_in_progress here too, but it's safer |
| 804 | not too */ | 818 | not too */ |
| @@ -931,6 +945,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, | |||
| 931 | 945 | ||
| 932 | #endif | 946 | #endif |
| 933 | 947 | ||
| 948 | void __trigger_all_cpu_backtrace(void) | ||
| 949 | { | ||
| 950 | int i; | ||
| 951 | |||
| 952 | backtrace_mask = cpu_online_map; | ||
| 953 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | ||
| 954 | for (i = 0; i < 10 * 1000; i++) { | ||
| 955 | if (cpus_empty(backtrace_mask)) | ||
| 956 | break; | ||
| 957 | mdelay(1); | ||
| 958 | } | ||
| 959 | } | ||
| 960 | |||
| 934 | EXPORT_SYMBOL(nmi_active); | 961 | EXPORT_SYMBOL(nmi_active); |
| 935 | EXPORT_SYMBOL(nmi_watchdog); | 962 | EXPORT_SYMBOL(nmi_watchdog); |
| 936 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); | 963 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); |
diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h index 269d315719ca..b04333ea6f31 100644 --- a/include/asm-i386/nmi.h +++ b/include/asm-i386/nmi.h | |||
| @@ -5,6 +5,9 @@ | |||
| 5 | #define ASM_NMI_H | 5 | #define ASM_NMI_H |
| 6 | 6 | ||
| 7 | #include <linux/pm.h> | 7 | #include <linux/pm.h> |
| 8 | #include <asm/irq.h> | ||
| 9 | |||
| 10 | #ifdef ARCH_HAS_NMI_WATCHDOG | ||
| 8 | 11 | ||
| 9 | /** | 12 | /** |
| 10 | * do_nmi_callback | 13 | * do_nmi_callback |
| @@ -42,4 +45,9 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, | |||
| 42 | void __user *, size_t *, loff_t *); | 45 | void __user *, size_t *, loff_t *); |
| 43 | extern int unknown_nmi_panic; | 46 | extern int unknown_nmi_panic; |
| 44 | 47 | ||
| 48 | void __trigger_all_cpu_backtrace(void); | ||
| 49 | #define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() | ||
| 50 | |||
| 51 | #endif | ||
| 52 | |||
| 45 | #endif /* ASM_NMI_H */ | 53 | #endif /* ASM_NMI_H */ |
diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h index f367d4014b42..72375e7d32a8 100644 --- a/include/asm-x86_64/nmi.h +++ b/include/asm-x86_64/nmi.h | |||
| @@ -77,4 +77,7 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, | |||
| 77 | 77 | ||
| 78 | extern int unknown_nmi_panic; | 78 | extern int unknown_nmi_panic; |
| 79 | 79 | ||
| 80 | void __trigger_all_cpu_backtrace(void); | ||
| 81 | #define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() | ||
| 82 | |||
| 80 | #endif /* ASM_NMI_H */ | 83 | #endif /* ASM_NMI_H */ |
diff --git a/include/linux/nmi.h b/include/linux/nmi.h index e16904e28c3a..acb4ed130247 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h | |||
| @@ -15,9 +15,14 @@ | |||
| 15 | * disables interrupts for a long time. This call is stateless. | 15 | * disables interrupts for a long time. This call is stateless. |
| 16 | */ | 16 | */ |
| 17 | #ifdef ARCH_HAS_NMI_WATCHDOG | 17 | #ifdef ARCH_HAS_NMI_WATCHDOG |
| 18 | #include <asm/nmi.h> | ||
| 18 | extern void touch_nmi_watchdog(void); | 19 | extern void touch_nmi_watchdog(void); |
| 19 | #else | 20 | #else |
| 20 | # define touch_nmi_watchdog() touch_softlockup_watchdog() | 21 | # define touch_nmi_watchdog() touch_softlockup_watchdog() |
| 21 | #endif | 22 | #endif |
| 22 | 23 | ||
| 24 | #ifndef trigger_all_cpu_backtrace | ||
| 25 | #define trigger_all_cpu_backtrace() do { } while (0) | ||
| 26 | #endif | ||
| 27 | |||
| 23 | #endif | 28 | #endif |
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index b6c4f898197c..479fd462eaa9 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #include <linux/spinlock.h> | 9 | #include <linux/spinlock.h> |
| 10 | #include <linux/nmi.h> | ||
| 10 | #include <linux/interrupt.h> | 11 | #include <linux/interrupt.h> |
| 11 | #include <linux/debug_locks.h> | 12 | #include <linux/debug_locks.h> |
| 12 | #include <linux/delay.h> | 13 | #include <linux/delay.h> |
| @@ -117,6 +118,9 @@ static void __spin_lock_debug(spinlock_t *lock) | |||
| 117 | raw_smp_processor_id(), current->comm, | 118 | raw_smp_processor_id(), current->comm, |
| 118 | current->pid, lock); | 119 | current->pid, lock); |
| 119 | dump_stack(); | 120 | dump_stack(); |
| 121 | #ifdef CONFIG_SMP | ||
| 122 | trigger_all_cpu_backtrace(); | ||
| 123 | #endif | ||
| 120 | } | 124 | } |
| 121 | } | 125 | } |
| 122 | } | 126 | } |
