aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2005-10-30 17:59:40 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-10-30 20:37:13 -0500
commit29b70081f7cb094513d5189e82d3478b50777a28 (patch)
treed992aa91eeae9ac79265dd0ead0d7cc5a48f31f1 /arch
parentfcfd636a728fe2b8fb8c8fd8c557302059580577 (diff)
[PATCH] i386 nmi_watchdog: Merge check_nmi_watchdog fixes from x86_64
The per cpu nmi watchdog timer is based on an event counter. idle cpus don't generate events so the NMI watchdog doesn't fire and the test to see if the watchdog is working fails. - Add nmi_cpu_busy so idle cpus don't mess up the test. - kmalloc prev_nmi_count to keep kernel stack usage bounded. - Improve the error message on failure so there is enough information to debug problems. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/i386/kernel/nmi.c39
1 files changed, 37 insertions, 2 deletions
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 72515b8a1b1..d661703ac1c 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -100,16 +100,44 @@ int nmi_active;
100 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ 100 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
101 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) 101 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
102 102
103#ifdef CONFIG_SMP
104/* The performance counters used by NMI_LOCAL_APIC don't trigger when
105 * the CPU is idle. To make sure the NMI watchdog really ticks on all
106 * CPUs during the test make them busy.
107 */
108static __init void nmi_cpu_busy(void *data)
109{
110 volatile int *endflag = data;
111 local_irq_enable();
112 /* Intentionally don't use cpu_relax here. This is
113 to make sure that the performance counter really ticks,
114 even if there is a simulator or similar that catches the
115 pause instruction. On a real HT machine this is fine because
116 all other CPUs are busy with "useless" delay loops and don't
117 care if they get somewhat less cycles. */
118 while (*endflag == 0)
119 barrier();
120}
121#endif
122
103static int __init check_nmi_watchdog(void) 123static int __init check_nmi_watchdog(void)
104{ 124{
105 unsigned int prev_nmi_count[NR_CPUS]; 125 volatile int endflag = 0;
126 unsigned int *prev_nmi_count;
106 int cpu; 127 int cpu;
107 128
108 if (nmi_watchdog == NMI_NONE) 129 if (nmi_watchdog == NMI_NONE)
109 return 0; 130 return 0;
110 131
132 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
133 if (!prev_nmi_count)
134 return -1;
135
111 printk(KERN_INFO "Testing NMI watchdog ... "); 136 printk(KERN_INFO "Testing NMI watchdog ... ");
112 137
138 if (nmi_watchdog == NMI_LOCAL_APIC)
139 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
140
113 for (cpu = 0; cpu < NR_CPUS; cpu++) 141 for (cpu = 0; cpu < NR_CPUS; cpu++)
114 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; 142 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
115 local_irq_enable(); 143 local_irq_enable();
@@ -123,12 +151,18 @@ static int __init check_nmi_watchdog(void)
123 continue; 151 continue;
124#endif 152#endif
125 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { 153 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
126 printk("CPU#%d: NMI appears to be stuck!\n", cpu); 154 endflag = 1;
155 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
156 cpu,
157 prev_nmi_count[cpu],
158 nmi_count(cpu));
127 nmi_active = 0; 159 nmi_active = 0;
128 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; 160 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
161 kfree(prev_nmi_count);
129 return -1; 162 return -1;
130 } 163 }
131 } 164 }
165 endflag = 1;
132 printk("OK.\n"); 166 printk("OK.\n");
133 167
134 /* now that we know it works we can reduce NMI frequency to 168 /* now that we know it works we can reduce NMI frequency to
@@ -136,6 +170,7 @@ static int __init check_nmi_watchdog(void)
136 if (nmi_watchdog == NMI_LOCAL_APIC) 170 if (nmi_watchdog == NMI_LOCAL_APIC)
137 nmi_hz = 1; 171 nmi_hz = 1;
138 172
173 kfree(prev_nmi_count);
139 return 0; 174 return 0;
140} 175}
141/* This needs to happen later in boot so counters are working */ 176/* This needs to happen later in boot so counters are working */