diff options
-rw-r--r-- | arch/i386/kernel/nmi.c | 39 |
1 files changed, 37 insertions, 2 deletions
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 72515b8a1b12..d661703ac1cb 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
@@ -100,16 +100,44 @@ int nmi_active; | |||
100 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ | 100 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ |
101 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) | 101 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) |
102 | 102 | ||
103 | #ifdef CONFIG_SMP | ||
104 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when | ||
105 | * the CPU is idle. To make sure the NMI watchdog really ticks on all | ||
106 | * CPUs during the test make them busy. | ||
107 | */ | ||
108 | static __init void nmi_cpu_busy(void *data) | ||
109 | { | ||
110 | volatile int *endflag = data; | ||
111 | local_irq_enable(); | ||
112 | /* Intentionally don't use cpu_relax here. This is | ||
113 | to make sure that the performance counter really ticks, | ||
114 | even if there is a simulator or similar that catches the | ||
115 | pause instruction. On a real HT machine this is fine because | ||
116 | all other CPUs are busy with "useless" delay loops and don't | ||
117 | care if they get somewhat less cycles. */ | ||
118 | while (*endflag == 0) | ||
119 | barrier(); | ||
120 | } | ||
121 | #endif | ||
122 | |||
103 | static int __init check_nmi_watchdog(void) | 123 | static int __init check_nmi_watchdog(void) |
104 | { | 124 | { |
105 | unsigned int prev_nmi_count[NR_CPUS]; | 125 | volatile int endflag = 0; |
126 | unsigned int *prev_nmi_count; | ||
106 | int cpu; | 127 | int cpu; |
107 | 128 | ||
108 | if (nmi_watchdog == NMI_NONE) | 129 | if (nmi_watchdog == NMI_NONE) |
109 | return 0; | 130 | return 0; |
110 | 131 | ||
132 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); | ||
133 | if (!prev_nmi_count) | ||
134 | return -1; | ||
135 | |||
111 | printk(KERN_INFO "Testing NMI watchdog ... "); | 136 | printk(KERN_INFO "Testing NMI watchdog ... "); |
112 | 137 | ||
138 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
139 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); | ||
140 | |||
113 | for (cpu = 0; cpu < NR_CPUS; cpu++) | 141 | for (cpu = 0; cpu < NR_CPUS; cpu++) |
114 | prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; | 142 | prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; |
115 | local_irq_enable(); | 143 | local_irq_enable(); |
@@ -123,12 +151,18 @@ static int __init check_nmi_watchdog(void) | |||
123 | continue; | 151 | continue; |
124 | #endif | 152 | #endif |
125 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { | 153 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { |
126 | printk("CPU#%d: NMI appears to be stuck!\n", cpu); | 154 | endflag = 1; |
155 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", | ||
156 | cpu, | ||
157 | prev_nmi_count[cpu], | ||
158 | nmi_count(cpu)); | ||
127 | nmi_active = 0; | 159 | nmi_active = 0; |
128 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; | 160 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; |
161 | kfree(prev_nmi_count); | ||
129 | return -1; | 162 | return -1; |
130 | } | 163 | } |
131 | } | 164 | } |
165 | endflag = 1; | ||
132 | printk("OK.\n"); | 166 | printk("OK.\n"); |
133 | 167 | ||
134 | /* now that we know it works we can reduce NMI frequency to | 168 | /* now that we know it works we can reduce NMI frequency to |
@@ -136,6 +170,7 @@ static int __init check_nmi_watchdog(void) | |||
136 | if (nmi_watchdog == NMI_LOCAL_APIC) | 170 | if (nmi_watchdog == NMI_LOCAL_APIC) |
137 | nmi_hz = 1; | 171 | nmi_hz = 1; |
138 | 172 | ||
173 | kfree(prev_nmi_count); | ||
139 | return 0; | 174 | return 0; |
140 | } | 175 | } |
141 | /* This needs to happen later in boot so counters are working */ | 176 | /* This needs to happen later in boot so counters are working */ |