aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Morton <akpm@osdl.org>2006-12-06 20:14:01 -0500
committerAndi Kleen <andi@basil.nowhere.org>2006-12-06 20:14:01 -0500
commitbb81a09e55eaf7e5f798468ab971469b6f66a259 (patch)
treecf1ed6b0ad75137361228955535044fd4630a57b
parente5e3a0428968dcc1f9318ce1c941a918e99f8b84 (diff)
[PATCH] x86: all cpu backtrace
When a spinlock lockup occurs, arrange for the NMI code to emit an all-cpu backtrace, so we get to see which CPU is holding the lock, and where. Cc: Andi Kleen <ak@muc.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Badari Pulavarty <pbadari@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Andi Kleen <ak@suse.de>
-rw-r--r--arch/i386/kernel/nmi.c26
-rw-r--r--arch/x86_64/kernel/nmi.c29
-rw-r--r--include/asm-i386/nmi.h8
-rw-r--r--include/asm-x86_64/nmi.h3
-rw-r--r--include/linux/nmi.h5
-rw-r--r--lib/spinlock_debug.c4
6 files changed, 74 insertions, 1 deletions
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index eaafe233a5da..171194ccb7bc 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -22,6 +22,7 @@
22#include <linux/percpu.h> 22#include <linux/percpu.h>
23#include <linux/dmi.h> 23#include <linux/dmi.h>
24#include <linux/kprobes.h> 24#include <linux/kprobes.h>
25#include <linux/cpumask.h>
25 26
26#include <asm/smp.h> 27#include <asm/smp.h>
27#include <asm/nmi.h> 28#include <asm/nmi.h>
@@ -42,6 +43,8 @@ int nmi_watchdog_enabled;
42static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); 43static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
43static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); 44static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
44 45
46static cpumask_t backtrace_mask = CPU_MASK_NONE;
47
45/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 48/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
46 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) 49 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
47 */ 50 */
@@ -907,6 +910,16 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
907 touched = 1; 910 touched = 1;
908 } 911 }
909 912
913 if (cpu_isset(cpu, backtrace_mask)) {
914 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
915
916 spin_lock(&lock);
917 printk("NMI backtrace for cpu %d\n", cpu);
918 dump_stack();
919 spin_unlock(&lock);
920 cpu_clear(cpu, backtrace_mask);
921 }
922
910 sum = per_cpu(irq_stat, cpu).apic_timer_irqs; 923 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
911 924
912 /* if the apic timer isn't firing, this cpu isn't doing much */ 925 /* if the apic timer isn't firing, this cpu isn't doing much */
@@ -1033,6 +1046,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
1033 1046
1034#endif 1047#endif
1035 1048
1049void __trigger_all_cpu_backtrace(void)
1050{
1051 int i;
1052
1053 backtrace_mask = cpu_online_map;
1054 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
1055 for (i = 0; i < 10 * 1000; i++) {
1056 if (cpus_empty(backtrace_mask))
1057 break;
1058 mdelay(1);
1059 }
1060}
1061
1036EXPORT_SYMBOL(nmi_active); 1062EXPORT_SYMBOL(nmi_active);
1037EXPORT_SYMBOL(nmi_watchdog); 1063EXPORT_SYMBOL(nmi_watchdog);
1038EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); 1064EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 7af9cb3e2d99..27e95e7922c1 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -12,14 +12,15 @@
12 * Mikael Pettersson : PM converted to driver model. Disable/enable API. 12 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
13 */ 13 */
14 14
15#include <linux/nmi.h>
15#include <linux/mm.h> 16#include <linux/mm.h>
16#include <linux/delay.h> 17#include <linux/delay.h>
17#include <linux/interrupt.h> 18#include <linux/interrupt.h>
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/sysdev.h> 20#include <linux/sysdev.h>
20#include <linux/nmi.h>
21#include <linux/sysctl.h> 21#include <linux/sysctl.h>
22#include <linux/kprobes.h> 22#include <linux/kprobes.h>
23#include <linux/cpumask.h>
23 24
24#include <asm/smp.h> 25#include <asm/smp.h>
25#include <asm/nmi.h> 26#include <asm/nmi.h>
@@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi;
41static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); 42static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner);
42static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); 43static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]);
43 44
45static cpumask_t backtrace_mask = CPU_MASK_NONE;
46
44/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 47/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
45 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) 48 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
46 */ 49 */
@@ -782,6 +785,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
782{ 785{
783 int sum; 786 int sum;
784 int touched = 0; 787 int touched = 0;
788 int cpu = smp_processor_id();
785 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 789 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
786 u64 dummy; 790 u64 dummy;
787 int rc=0; 791 int rc=0;
@@ -799,6 +803,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
799 touched = 1; 803 touched = 1;
800 } 804 }
801 805
806 if (cpu_isset(cpu, backtrace_mask)) {
807 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
808
809 spin_lock(&lock);
810 printk("NMI backtrace for cpu %d\n", cpu);
811 dump_stack();
812 spin_unlock(&lock);
813 cpu_clear(cpu, backtrace_mask);
814 }
815
802#ifdef CONFIG_X86_MCE 816#ifdef CONFIG_X86_MCE
803 /* Could check oops_in_progress here too, but it's safer 817 /* Could check oops_in_progress here too, but it's safer
804 not too */ 818 not too */
@@ -931,6 +945,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
931 945
932#endif 946#endif
933 947
948void __trigger_all_cpu_backtrace(void)
949{
950 int i;
951
952 backtrace_mask = cpu_online_map;
953 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
954 for (i = 0; i < 10 * 1000; i++) {
955 if (cpus_empty(backtrace_mask))
956 break;
957 mdelay(1);
958 }
959}
960
934EXPORT_SYMBOL(nmi_active); 961EXPORT_SYMBOL(nmi_active);
935EXPORT_SYMBOL(nmi_watchdog); 962EXPORT_SYMBOL(nmi_watchdog);
936EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); 963EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h
index 269d315719ca..b04333ea6f31 100644
--- a/include/asm-i386/nmi.h
+++ b/include/asm-i386/nmi.h
@@ -5,6 +5,9 @@
5#define ASM_NMI_H 5#define ASM_NMI_H
6 6
7#include <linux/pm.h> 7#include <linux/pm.h>
8#include <asm/irq.h>
9
10#ifdef ARCH_HAS_NMI_WATCHDOG
8 11
9/** 12/**
10 * do_nmi_callback 13 * do_nmi_callback
@@ -42,4 +45,9 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
42 void __user *, size_t *, loff_t *); 45 void __user *, size_t *, loff_t *);
43extern int unknown_nmi_panic; 46extern int unknown_nmi_panic;
44 47
48void __trigger_all_cpu_backtrace(void);
49#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
50
51#endif
52
45#endif /* ASM_NMI_H */ 53#endif /* ASM_NMI_H */
diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h
index f367d4014b42..72375e7d32a8 100644
--- a/include/asm-x86_64/nmi.h
+++ b/include/asm-x86_64/nmi.h
@@ -77,4 +77,7 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
77 77
78extern int unknown_nmi_panic; 78extern int unknown_nmi_panic;
79 79
80void __trigger_all_cpu_backtrace(void);
81#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
82
80#endif /* ASM_NMI_H */ 83#endif /* ASM_NMI_H */
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index e16904e28c3a..acb4ed130247 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -15,9 +15,14 @@
15 * disables interrupts for a long time. This call is stateless. 15 * disables interrupts for a long time. This call is stateless.
16 */ 16 */
17#ifdef ARCH_HAS_NMI_WATCHDOG 17#ifdef ARCH_HAS_NMI_WATCHDOG
18#include <asm/nmi.h>
18extern void touch_nmi_watchdog(void); 19extern void touch_nmi_watchdog(void);
19#else 20#else
20# define touch_nmi_watchdog() touch_softlockup_watchdog() 21# define touch_nmi_watchdog() touch_softlockup_watchdog()
21#endif 22#endif
22 23
24#ifndef trigger_all_cpu_backtrace
25#define trigger_all_cpu_backtrace() do { } while (0)
26#endif
27
23#endif 28#endif
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index b6c4f898197c..479fd462eaa9 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -7,6 +7,7 @@
7 */ 7 */
8 8
9#include <linux/spinlock.h> 9#include <linux/spinlock.h>
10#include <linux/nmi.h>
10#include <linux/interrupt.h> 11#include <linux/interrupt.h>
11#include <linux/debug_locks.h> 12#include <linux/debug_locks.h>
12#include <linux/delay.h> 13#include <linux/delay.h>
@@ -117,6 +118,9 @@ static void __spin_lock_debug(spinlock_t *lock)
117 raw_smp_processor_id(), current->comm, 118 raw_smp_processor_id(), current->comm,
118 current->pid, lock); 119 current->pid, lock);
119 dump_stack(); 120 dump_stack();
121#ifdef CONFIG_SMP
122 trigger_all_cpu_backtrace();
123#endif
120 } 124 }
121 } 125 }
122} 126}