aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/i386/kernel/nmi.c39
1 files changed, 37 insertions, 2 deletions
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 72515b8a1b12..d661703ac1cb 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -100,16 +100,44 @@ int nmi_active;
100 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ 100 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
101 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) 101 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
102 102
103#ifdef CONFIG_SMP
104/* The performance counters used by NMI_LOCAL_APIC don't trigger when
105 * the CPU is idle. To make sure the NMI watchdog really ticks on all
106 * CPUs during the test make them busy.
107 */
108static __init void nmi_cpu_busy(void *data)
109{
110 volatile int *endflag = data;
111 local_irq_enable();
112 /* Intentionally don't use cpu_relax here. This is
113 to make sure that the performance counter really ticks,
114 even if there is a simulator or similar that catches the
115 pause instruction. On a real HT machine this is fine because
116 all other CPUs are busy with "useless" delay loops and don't
117 care if they get somewhat less cycles. */
118 while (*endflag == 0)
119 barrier();
120}
121#endif
122
103static int __init check_nmi_watchdog(void) 123static int __init check_nmi_watchdog(void)
104{ 124{
105 unsigned int prev_nmi_count[NR_CPUS]; 125 volatile int endflag = 0;
126 unsigned int *prev_nmi_count;
106 int cpu; 127 int cpu;
107 128
108 if (nmi_watchdog == NMI_NONE) 129 if (nmi_watchdog == NMI_NONE)
109 return 0; 130 return 0;
110 131
132 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
133 if (!prev_nmi_count)
134 return -1;
135
111 printk(KERN_INFO "Testing NMI watchdog ... "); 136 printk(KERN_INFO "Testing NMI watchdog ... ");
112 137
138 if (nmi_watchdog == NMI_LOCAL_APIC)
139 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
140
113 for (cpu = 0; cpu < NR_CPUS; cpu++) 141 for (cpu = 0; cpu < NR_CPUS; cpu++)
114 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; 142 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
115 local_irq_enable(); 143 local_irq_enable();
@@ -123,12 +151,18 @@ static int __init check_nmi_watchdog(void)
123 continue; 151 continue;
124#endif 152#endif
125 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { 153 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
126 printk("CPU#%d: NMI appears to be stuck!\n", cpu); 154 endflag = 1;
155 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
156 cpu,
157 prev_nmi_count[cpu],
158 nmi_count(cpu));
127 nmi_active = 0; 159 nmi_active = 0;
128 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; 160 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
161 kfree(prev_nmi_count);
129 return -1; 162 return -1;
130 } 163 }
131 } 164 }
165 endflag = 1;
132 printk("OK.\n"); 166 printk("OK.\n");
133 167
134 /* now that we know it works we can reduce NMI frequency to 168 /* now that we know it works we can reduce NMI frequency to
@@ -136,6 +170,7 @@ static int __init check_nmi_watchdog(void)
136 if (nmi_watchdog == NMI_LOCAL_APIC) 170 if (nmi_watchdog == NMI_LOCAL_APIC)
137 nmi_hz = 1; 171 nmi_hz = 1;
138 172
173 kfree(prev_nmi_count);
139 return 0; 174 return 0;
140} 175}
141/* This needs to happen later in boot so counters are working */ 176/* This needs to happen later in boot so counters are working */