diff options
author | Andrew Morton <akpm@osdl.org> | 2006-12-06 20:14:01 -0500 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2006-12-06 20:14:01 -0500 |
commit | bb81a09e55eaf7e5f798468ab971469b6f66a259 (patch) | |
tree | cf1ed6b0ad75137361228955535044fd4630a57b | |
parent | e5e3a0428968dcc1f9318ce1c941a918e99f8b84 (diff) |
[PATCH] x86: all cpu backtrace
When a spinlock lockup occurs, arrange for the NMI code to emit an all-cpu
backtrace, so we get to see which CPU is holding the lock, and where.
Cc: Andi Kleen <ak@muc.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Andi Kleen <ak@suse.de>
-rw-r--r-- | arch/i386/kernel/nmi.c | 26 | ||||
-rw-r--r-- | arch/x86_64/kernel/nmi.c | 29 | ||||
-rw-r--r-- | include/asm-i386/nmi.h | 8 | ||||
-rw-r--r-- | include/asm-x86_64/nmi.h | 3 | ||||
-rw-r--r-- | include/linux/nmi.h | 5 | ||||
-rw-r--r-- | lib/spinlock_debug.c | 4 |
6 files changed, 74 insertions, 1 deletions
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index eaafe233a5da..171194ccb7bc 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/percpu.h> | 22 | #include <linux/percpu.h> |
23 | #include <linux/dmi.h> | 23 | #include <linux/dmi.h> |
24 | #include <linux/kprobes.h> | 24 | #include <linux/kprobes.h> |
25 | #include <linux/cpumask.h> | ||
25 | 26 | ||
26 | #include <asm/smp.h> | 27 | #include <asm/smp.h> |
27 | #include <asm/nmi.h> | 28 | #include <asm/nmi.h> |
@@ -42,6 +43,8 @@ int nmi_watchdog_enabled; | |||
42 | static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); | 43 | static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); |
43 | static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); | 44 | static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); |
44 | 45 | ||
46 | static cpumask_t backtrace_mask = CPU_MASK_NONE; | ||
47 | |||
45 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's | 48 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's |
46 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) | 49 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) |
47 | */ | 50 | */ |
@@ -907,6 +910,16 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
907 | touched = 1; | 910 | touched = 1; |
908 | } | 911 | } |
909 | 912 | ||
913 | if (cpu_isset(cpu, backtrace_mask)) { | ||
914 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | ||
915 | |||
916 | spin_lock(&lock); | ||
917 | printk("NMI backtrace for cpu %d\n", cpu); | ||
918 | dump_stack(); | ||
919 | spin_unlock(&lock); | ||
920 | cpu_clear(cpu, backtrace_mask); | ||
921 | } | ||
922 | |||
910 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; | 923 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; |
911 | 924 | ||
912 | /* if the apic timer isn't firing, this cpu isn't doing much */ | 925 | /* if the apic timer isn't firing, this cpu isn't doing much */ |
@@ -1033,6 +1046,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, | |||
1033 | 1046 | ||
1034 | #endif | 1047 | #endif |
1035 | 1048 | ||
1049 | void __trigger_all_cpu_backtrace(void) | ||
1050 | { | ||
1051 | int i; | ||
1052 | |||
1053 | backtrace_mask = cpu_online_map; | ||
1054 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | ||
1055 | for (i = 0; i < 10 * 1000; i++) { | ||
1056 | if (cpus_empty(backtrace_mask)) | ||
1057 | break; | ||
1058 | mdelay(1); | ||
1059 | } | ||
1060 | } | ||
1061 | |||
1036 | EXPORT_SYMBOL(nmi_active); | 1062 | EXPORT_SYMBOL(nmi_active); |
1037 | EXPORT_SYMBOL(nmi_watchdog); | 1063 | EXPORT_SYMBOL(nmi_watchdog); |
1038 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); | 1064 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); |
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 7af9cb3e2d99..27e95e7922c1 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -12,14 +12,15 @@ | |||
12 | * Mikael Pettersson : PM converted to driver model. Disable/enable API. | 12 | * Mikael Pettersson : PM converted to driver model. Disable/enable API. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/nmi.h> | ||
15 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
16 | #include <linux/delay.h> | 17 | #include <linux/delay.h> |
17 | #include <linux/interrupt.h> | 18 | #include <linux/interrupt.h> |
18 | #include <linux/module.h> | 19 | #include <linux/module.h> |
19 | #include <linux/sysdev.h> | 20 | #include <linux/sysdev.h> |
20 | #include <linux/nmi.h> | ||
21 | #include <linux/sysctl.h> | 21 | #include <linux/sysctl.h> |
22 | #include <linux/kprobes.h> | 22 | #include <linux/kprobes.h> |
23 | #include <linux/cpumask.h> | ||
23 | 24 | ||
24 | #include <asm/smp.h> | 25 | #include <asm/smp.h> |
25 | #include <asm/nmi.h> | 26 | #include <asm/nmi.h> |
@@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi; | |||
41 | static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); | 42 | static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); |
42 | static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); | 43 | static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); |
43 | 44 | ||
45 | static cpumask_t backtrace_mask = CPU_MASK_NONE; | ||
46 | |||
44 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's | 47 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's |
45 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) | 48 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) |
46 | */ | 49 | */ |
@@ -782,6 +785,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
782 | { | 785 | { |
783 | int sum; | 786 | int sum; |
784 | int touched = 0; | 787 | int touched = 0; |
788 | int cpu = smp_processor_id(); | ||
785 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 789 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
786 | u64 dummy; | 790 | u64 dummy; |
787 | int rc=0; | 791 | int rc=0; |
@@ -799,6 +803,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
799 | touched = 1; | 803 | touched = 1; |
800 | } | 804 | } |
801 | 805 | ||
806 | if (cpu_isset(cpu, backtrace_mask)) { | ||
807 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | ||
808 | |||
809 | spin_lock(&lock); | ||
810 | printk("NMI backtrace for cpu %d\n", cpu); | ||
811 | dump_stack(); | ||
812 | spin_unlock(&lock); | ||
813 | cpu_clear(cpu, backtrace_mask); | ||
814 | } | ||
815 | |||
802 | #ifdef CONFIG_X86_MCE | 816 | #ifdef CONFIG_X86_MCE |
803 | /* Could check oops_in_progress here too, but it's safer | 817 | /* Could check oops_in_progress here too, but it's safer |
804 | not too */ | 818 | not too */ |
@@ -931,6 +945,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, | |||
931 | 945 | ||
932 | #endif | 946 | #endif |
933 | 947 | ||
948 | void __trigger_all_cpu_backtrace(void) | ||
949 | { | ||
950 | int i; | ||
951 | |||
952 | backtrace_mask = cpu_online_map; | ||
953 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | ||
954 | for (i = 0; i < 10 * 1000; i++) { | ||
955 | if (cpus_empty(backtrace_mask)) | ||
956 | break; | ||
957 | mdelay(1); | ||
958 | } | ||
959 | } | ||
960 | |||
934 | EXPORT_SYMBOL(nmi_active); | 961 | EXPORT_SYMBOL(nmi_active); |
935 | EXPORT_SYMBOL(nmi_watchdog); | 962 | EXPORT_SYMBOL(nmi_watchdog); |
936 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); | 963 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); |
diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h index 269d315719ca..b04333ea6f31 100644 --- a/include/asm-i386/nmi.h +++ b/include/asm-i386/nmi.h | |||
@@ -5,6 +5,9 @@ | |||
5 | #define ASM_NMI_H | 5 | #define ASM_NMI_H |
6 | 6 | ||
7 | #include <linux/pm.h> | 7 | #include <linux/pm.h> |
8 | #include <asm/irq.h> | ||
9 | |||
10 | #ifdef ARCH_HAS_NMI_WATCHDOG | ||
8 | 11 | ||
9 | /** | 12 | /** |
10 | * do_nmi_callback | 13 | * do_nmi_callback |
@@ -42,4 +45,9 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, | |||
42 | void __user *, size_t *, loff_t *); | 45 | void __user *, size_t *, loff_t *); |
43 | extern int unknown_nmi_panic; | 46 | extern int unknown_nmi_panic; |
44 | 47 | ||
48 | void __trigger_all_cpu_backtrace(void); | ||
49 | #define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() | ||
50 | |||
51 | #endif | ||
52 | |||
45 | #endif /* ASM_NMI_H */ | 53 | #endif /* ASM_NMI_H */ |
diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h index f367d4014b42..72375e7d32a8 100644 --- a/include/asm-x86_64/nmi.h +++ b/include/asm-x86_64/nmi.h | |||
@@ -77,4 +77,7 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, | |||
77 | 77 | ||
78 | extern int unknown_nmi_panic; | 78 | extern int unknown_nmi_panic; |
79 | 79 | ||
80 | void __trigger_all_cpu_backtrace(void); | ||
81 | #define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() | ||
82 | |||
80 | #endif /* ASM_NMI_H */ | 83 | #endif /* ASM_NMI_H */ |
diff --git a/include/linux/nmi.h b/include/linux/nmi.h index e16904e28c3a..acb4ed130247 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h | |||
@@ -15,9 +15,14 @@ | |||
15 | * disables interrupts for a long time. This call is stateless. | 15 | * disables interrupts for a long time. This call is stateless. |
16 | */ | 16 | */ |
17 | #ifdef ARCH_HAS_NMI_WATCHDOG | 17 | #ifdef ARCH_HAS_NMI_WATCHDOG |
18 | #include <asm/nmi.h> | ||
18 | extern void touch_nmi_watchdog(void); | 19 | extern void touch_nmi_watchdog(void); |
19 | #else | 20 | #else |
20 | # define touch_nmi_watchdog() touch_softlockup_watchdog() | 21 | # define touch_nmi_watchdog() touch_softlockup_watchdog() |
21 | #endif | 22 | #endif |
22 | 23 | ||
24 | #ifndef trigger_all_cpu_backtrace | ||
25 | #define trigger_all_cpu_backtrace() do { } while (0) | ||
26 | #endif | ||
27 | |||
23 | #endif | 28 | #endif |
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index b6c4f898197c..479fd462eaa9 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c | |||
@@ -7,6 +7,7 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/spinlock.h> | 9 | #include <linux/spinlock.h> |
10 | #include <linux/nmi.h> | ||
10 | #include <linux/interrupt.h> | 11 | #include <linux/interrupt.h> |
11 | #include <linux/debug_locks.h> | 12 | #include <linux/debug_locks.h> |
12 | #include <linux/delay.h> | 13 | #include <linux/delay.h> |
@@ -117,6 +118,9 @@ static void __spin_lock_debug(spinlock_t *lock) | |||
117 | raw_smp_processor_id(), current->comm, | 118 | raw_smp_processor_id(), current->comm, |
118 | current->pid, lock); | 119 | current->pid, lock); |
119 | dump_stack(); | 120 | dump_stack(); |
121 | #ifdef CONFIG_SMP | ||
122 | trigger_all_cpu_backtrace(); | ||
123 | #endif | ||
120 | } | 124 | } |
121 | } | 125 | } |
122 | } | 126 | } |