aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/nmi.c
diff options
context:
space:
mode:
authorJeff Garzik <jeff@garzik.org>2006-09-27 18:13:53 -0400
committerJeff Garzik <jeff@garzik.org>2006-09-27 18:13:53 -0400
commitaebb1153ac54ddbbd3d3f0481a193f4bf0ead53b (patch)
tree57425aa83c8bed5b41af7e3408024fe1f2fdded9 /arch/i386/kernel/nmi.c
parent022e7a12b6aa11a11de4d708fe8606c9a6734b37 (diff)
parenta77c64c1a641950626181b4857abb701d8f38ccc (diff)
Merge branch 'master' into upstream
Diffstat (limited to 'arch/i386/kernel/nmi.c')
-rw-r--r--arch/i386/kernel/nmi.c940
1 files changed, 651 insertions, 289 deletions
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index acb351478e42..dbda706fdd14 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -21,83 +21,174 @@
21#include <linux/sysdev.h> 21#include <linux/sysdev.h>
22#include <linux/sysctl.h> 22#include <linux/sysctl.h>
23#include <linux/percpu.h> 23#include <linux/percpu.h>
24#include <linux/dmi.h>
25#include <linux/kprobes.h>
24 26
25#include <asm/smp.h> 27#include <asm/smp.h>
26#include <asm/nmi.h> 28#include <asm/nmi.h>
29#include <asm/kdebug.h>
27#include <asm/intel_arch_perfmon.h> 30#include <asm/intel_arch_perfmon.h>
28 31
29#include "mach_traps.h" 32#include "mach_traps.h"
30 33
31unsigned int nmi_watchdog = NMI_NONE; 34/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
32extern int unknown_nmi_panic; 35 * evtsel_nmi_owner tracks the ownership of the event selection
33static unsigned int nmi_hz = HZ; 36 * - different performance counters/ event selection may be reserved for
34static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ 37 * different subsystems this reservation system just tries to coordinate
35static unsigned int nmi_p4_cccr_val; 38 * things a little
36extern void show_registers(struct pt_regs *regs); 39 */
40static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
41static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
37 42
38/* 43/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
39 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: 44 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
40 * - it may be reserved by some other driver, or not
41 * - when not reserved by some other driver, it may be used for
42 * the NMI watchdog, or not
43 *
44 * This is maintained separately from nmi_active because the NMI
45 * watchdog may also be driven from the I/O APIC timer.
46 */ 45 */
47static DEFINE_SPINLOCK(lapic_nmi_owner_lock); 46#define NMI_MAX_COUNTER_BITS 66
48static unsigned int lapic_nmi_owner;
49#define LAPIC_NMI_WATCHDOG (1<<0)
50#define LAPIC_NMI_RESERVED (1<<1)
51 47
52/* nmi_active: 48/* nmi_active:
53 * +1: the lapic NMI watchdog is active, but can be disabled 49 * >0: the lapic NMI watchdog is active, but can be disabled
54 * 0: the lapic NMI watchdog has not been set up, and cannot 50 * <0: the lapic NMI watchdog has not been set up, and cannot
55 * be enabled 51 * be enabled
56 * -1: the lapic NMI watchdog is disabled, but can be enabled 52 * 0: the lapic NMI watchdog is disabled, but can be enabled
57 */ 53 */
58int nmi_active; 54atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
59 55
60#define K7_EVNTSEL_ENABLE (1 << 22) 56unsigned int nmi_watchdog = NMI_DEFAULT;
61#define K7_EVNTSEL_INT (1 << 20) 57static unsigned int nmi_hz = HZ;
62#define K7_EVNTSEL_OS (1 << 17)
63#define K7_EVNTSEL_USR (1 << 16)
64#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
65#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
66 58
67#define P6_EVNTSEL0_ENABLE (1 << 22) 59struct nmi_watchdog_ctlblk {
68#define P6_EVNTSEL_INT (1 << 20) 60 int enabled;
69#define P6_EVNTSEL_OS (1 << 17) 61 u64 check_bit;
70#define P6_EVNTSEL_USR (1 << 16) 62 unsigned int cccr_msr;
71#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 63 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
72#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED 64 unsigned int evntsel_msr; /* the MSR to select the events to handle */
65};
66static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
73 67
74#define MSR_P4_MISC_ENABLE 0x1A0 68/* local prototypes */
75#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) 69static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
76#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
77#define MSR_P4_PERFCTR0 0x300
78#define MSR_P4_CCCR0 0x360
79#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
80#define P4_ESCR_OS (1<<3)
81#define P4_ESCR_USR (1<<2)
82#define P4_CCCR_OVF_PMI0 (1<<26)
83#define P4_CCCR_OVF_PMI1 (1<<27)
84#define P4_CCCR_THRESHOLD(N) ((N)<<20)
85#define P4_CCCR_COMPLEMENT (1<<19)
86#define P4_CCCR_COMPARE (1<<18)
87#define P4_CCCR_REQUIRED (3<<16)
88#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
89#define P4_CCCR_ENABLE (1<<12)
90/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
91 CRU_ESCR0 (with any non-null event selector) through a complemented
92 max threshold. [IA32-Vol3, Section 14.9.9] */
93#define MSR_P4_IQ_COUNTER0 0x30C
94#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
95#define P4_NMI_IQ_CCCR0 \
96 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
97 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
98 70
99#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 71extern void show_registers(struct pt_regs *regs);
100#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK 72extern int unknown_nmi_panic;
73
74/* converts an msr to an appropriate reservation bit */
75static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
76{
77 /* returns the bit offset of the performance counter register */
78 switch (boot_cpu_data.x86_vendor) {
79 case X86_VENDOR_AMD:
80 return (msr - MSR_K7_PERFCTR0);
81 case X86_VENDOR_INTEL:
82 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
83 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
84
85 switch (boot_cpu_data.x86) {
86 case 6:
87 return (msr - MSR_P6_PERFCTR0);
88 case 15:
89 return (msr - MSR_P4_BPU_PERFCTR0);
90 }
91 }
92 return 0;
93}
94
95/* converts an msr to an appropriate reservation bit */
96static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
97{
98 /* returns the bit offset of the event selection register */
99 switch (boot_cpu_data.x86_vendor) {
100 case X86_VENDOR_AMD:
101 return (msr - MSR_K7_EVNTSEL0);
102 case X86_VENDOR_INTEL:
103 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
104 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
105
106 switch (boot_cpu_data.x86) {
107 case 6:
108 return (msr - MSR_P6_EVNTSEL0);
109 case 15:
110 return (msr - MSR_P4_BSU_ESCR0);
111 }
112 }
113 return 0;
114}
115
116/* checks for a bit availability (hack for oprofile) */
117int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
118{
119 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
120
121 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
122}
123
124/* checks the an msr for availability */
125int avail_to_resrv_perfctr_nmi(unsigned int msr)
126{
127 unsigned int counter;
128
129 counter = nmi_perfctr_msr_to_bit(msr);
130 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
131
132 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
133}
134
135int reserve_perfctr_nmi(unsigned int msr)
136{
137 unsigned int counter;
138
139 counter = nmi_perfctr_msr_to_bit(msr);
140 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
141
142 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
143 return 1;
144 return 0;
145}
146
147void release_perfctr_nmi(unsigned int msr)
148{
149 unsigned int counter;
150
151 counter = nmi_perfctr_msr_to_bit(msr);
152 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
153
154 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
155}
156
157int reserve_evntsel_nmi(unsigned int msr)
158{
159 unsigned int counter;
160
161 counter = nmi_evntsel_msr_to_bit(msr);
162 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
163
164 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
165 return 1;
166 return 0;
167}
168
169void release_evntsel_nmi(unsigned int msr)
170{
171 unsigned int counter;
172
173 counter = nmi_evntsel_msr_to_bit(msr);
174 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
175
176 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
177}
178
179static __cpuinit inline int nmi_known_cpu(void)
180{
181 switch (boot_cpu_data.x86_vendor) {
182 case X86_VENDOR_AMD:
183 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
184 case X86_VENDOR_INTEL:
185 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
186 return 1;
187 else
188 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
189 }
190 return 0;
191}
101 192
102#ifdef CONFIG_SMP 193#ifdef CONFIG_SMP
103/* The performance counters used by NMI_LOCAL_APIC don't trigger when 194/* The performance counters used by NMI_LOCAL_APIC don't trigger when
@@ -125,7 +216,18 @@ static int __init check_nmi_watchdog(void)
125 unsigned int *prev_nmi_count; 216 unsigned int *prev_nmi_count;
126 int cpu; 217 int cpu;
127 218
128 if (nmi_watchdog == NMI_NONE) 219 /* Enable NMI watchdog for newer systems.
220 Actually it should be safe for most systems before 2004 too except
221 for some IBM systems that corrupt registers when NMI happens
222 during SMM. Unfortunately we don't have more exact information
223 on these and use this coarse check. */
224 if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004)
225 nmi_watchdog = NMI_LOCAL_APIC;
226
227 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
228 return 0;
229
230 if (!atomic_read(&nmi_active))
129 return 0; 231 return 0;
130 232
131 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); 233 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
@@ -149,25 +251,45 @@ static int __init check_nmi_watchdog(void)
149 if (!cpu_isset(cpu, cpu_callin_map)) 251 if (!cpu_isset(cpu, cpu_callin_map))
150 continue; 252 continue;
151#endif 253#endif
254 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
255 continue;
152 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { 256 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
153 endflag = 1;
154 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", 257 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
155 cpu, 258 cpu,
156 prev_nmi_count[cpu], 259 prev_nmi_count[cpu],
157 nmi_count(cpu)); 260 nmi_count(cpu));
158 nmi_active = 0; 261 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
159 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; 262 atomic_dec(&nmi_active);
160 kfree(prev_nmi_count);
161 return -1;
162 } 263 }
163 } 264 }
265 if (!atomic_read(&nmi_active)) {
266 kfree(prev_nmi_count);
267 atomic_set(&nmi_active, -1);
268 return -1;
269 }
164 endflag = 1; 270 endflag = 1;
165 printk("OK.\n"); 271 printk("OK.\n");
166 272
167 /* now that we know it works we can reduce NMI frequency to 273 /* now that we know it works we can reduce NMI frequency to
168 something more reasonable; makes a difference in some configs */ 274 something more reasonable; makes a difference in some configs */
169 if (nmi_watchdog == NMI_LOCAL_APIC) 275 if (nmi_watchdog == NMI_LOCAL_APIC) {
276 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
277
170 nmi_hz = 1; 278 nmi_hz = 1;
279 /*
280 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
281 * are writable, with higher bits sign extending from bit 31.
282 * So, we can only program the counter with 31 bit values and
283 * 32nd bit should be 1, for 33.. to be 1.
284 * Find the appropriate nmi_hz
285 */
286 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
287 ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
288 u64 count = (u64)cpu_khz * 1000;
289 do_div(count, 0x7fffffffUL);
290 nmi_hz = count + 1;
291 }
292 }
171 293
172 kfree(prev_nmi_count); 294 kfree(prev_nmi_count);
173 return 0; 295 return 0;
@@ -181,124 +303,70 @@ static int __init setup_nmi_watchdog(char *str)
181 303
182 get_option(&str, &nmi); 304 get_option(&str, &nmi);
183 305
184 if (nmi >= NMI_INVALID) 306 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
185 return 0; 307 return 0;
186 if (nmi == NMI_NONE)
187 nmi_watchdog = nmi;
188 /* 308 /*
189 * If any other x86 CPU has a local APIC, then 309 * If any other x86 CPU has a local APIC, then
190 * please test the NMI stuff there and send me the 310 * please test the NMI stuff there and send me the
191 * missing bits. Right now Intel P6/P4 and AMD K7 only. 311 * missing bits. Right now Intel P6/P4 and AMD K7 only.
192 */ 312 */
193 if ((nmi == NMI_LOCAL_APIC) && 313 if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
194 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 314 return 0; /* no lapic support */
195 (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) 315 nmi_watchdog = nmi;
196 nmi_watchdog = nmi;
197 if ((nmi == NMI_LOCAL_APIC) &&
198 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
199 (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
200 nmi_watchdog = nmi;
201 /*
202 * We can enable the IO-APIC watchdog
203 * unconditionally.
204 */
205 if (nmi == NMI_IO_APIC) {
206 nmi_active = 1;
207 nmi_watchdog = nmi;
208 }
209 return 1; 316 return 1;
210} 317}
211 318
212__setup("nmi_watchdog=", setup_nmi_watchdog); 319__setup("nmi_watchdog=", setup_nmi_watchdog);
213 320
214static void disable_intel_arch_watchdog(void);
215
216static void disable_lapic_nmi_watchdog(void) 321static void disable_lapic_nmi_watchdog(void)
217{ 322{
218 if (nmi_active <= 0) 323 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
324
325 if (atomic_read(&nmi_active) <= 0)
219 return; 326 return;
220 switch (boot_cpu_data.x86_vendor) {
221 case X86_VENDOR_AMD:
222 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
223 break;
224 case X86_VENDOR_INTEL:
225 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
226 disable_intel_arch_watchdog();
227 break;
228 }
229 switch (boot_cpu_data.x86) {
230 case 6:
231 if (boot_cpu_data.x86_model > 0xd)
232 break;
233 327
234 wrmsr(MSR_P6_EVNTSEL0, 0, 0); 328 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
235 break;
236 case 15:
237 if (boot_cpu_data.x86_model > 0x4)
238 break;
239 329
240 wrmsr(MSR_P4_IQ_CCCR0, 0, 0); 330 BUG_ON(atomic_read(&nmi_active) != 0);
241 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
242 break;
243 }
244 break;
245 }
246 nmi_active = -1;
247 /* tell do_nmi() and others that we're not active any more */
248 nmi_watchdog = 0;
249} 331}
250 332
251static void enable_lapic_nmi_watchdog(void) 333static void enable_lapic_nmi_watchdog(void)
252{ 334{
253 if (nmi_active < 0) { 335 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
254 nmi_watchdog = NMI_LOCAL_APIC;
255 setup_apic_nmi_watchdog();
256 }
257}
258 336
259int reserve_lapic_nmi(void) 337 /* are we already enabled */
260{ 338 if (atomic_read(&nmi_active) != 0)
261 unsigned int old_owner; 339 return;
262
263 spin_lock(&lapic_nmi_owner_lock);
264 old_owner = lapic_nmi_owner;
265 lapic_nmi_owner |= LAPIC_NMI_RESERVED;
266 spin_unlock(&lapic_nmi_owner_lock);
267 if (old_owner & LAPIC_NMI_RESERVED)
268 return -EBUSY;
269 if (old_owner & LAPIC_NMI_WATCHDOG)
270 disable_lapic_nmi_watchdog();
271 return 0;
272}
273 340
274void release_lapic_nmi(void) 341 /* are we lapic aware */
275{ 342 if (nmi_known_cpu() <= 0)
276 unsigned int new_owner; 343 return;
277 344
278 spin_lock(&lapic_nmi_owner_lock); 345 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
279 new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; 346 touch_nmi_watchdog();
280 lapic_nmi_owner = new_owner;
281 spin_unlock(&lapic_nmi_owner_lock);
282 if (new_owner & LAPIC_NMI_WATCHDOG)
283 enable_lapic_nmi_watchdog();
284} 347}
285 348
286void disable_timer_nmi_watchdog(void) 349void disable_timer_nmi_watchdog(void)
287{ 350{
288 if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) 351 BUG_ON(nmi_watchdog != NMI_IO_APIC);
352
353 if (atomic_read(&nmi_active) <= 0)
289 return; 354 return;
290 355
291 unset_nmi_callback(); 356 disable_irq(0);
292 nmi_active = -1; 357 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
293 nmi_watchdog = NMI_NONE; 358
359 BUG_ON(atomic_read(&nmi_active) != 0);
294} 360}
295 361
296void enable_timer_nmi_watchdog(void) 362void enable_timer_nmi_watchdog(void)
297{ 363{
298 if (nmi_active < 0) { 364 BUG_ON(nmi_watchdog != NMI_IO_APIC);
299 nmi_watchdog = NMI_IO_APIC; 365
366 if (atomic_read(&nmi_active) == 0) {
300 touch_nmi_watchdog(); 367 touch_nmi_watchdog();
301 nmi_active = 1; 368 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
369 enable_irq(0);
302 } 370 }
303} 371}
304 372
@@ -308,15 +376,20 @@ static int nmi_pm_active; /* nmi_active before suspend */
308 376
309static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) 377static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
310{ 378{
311 nmi_pm_active = nmi_active; 379 /* only CPU0 goes here, other CPUs should be offline */
312 disable_lapic_nmi_watchdog(); 380 nmi_pm_active = atomic_read(&nmi_active);
381 stop_apic_nmi_watchdog(NULL);
382 BUG_ON(atomic_read(&nmi_active) != 0);
313 return 0; 383 return 0;
314} 384}
315 385
316static int lapic_nmi_resume(struct sys_device *dev) 386static int lapic_nmi_resume(struct sys_device *dev)
317{ 387{
318 if (nmi_pm_active > 0) 388 /* only CPU0 goes here, other CPUs should be offline */
319 enable_lapic_nmi_watchdog(); 389 if (nmi_pm_active > 0) {
390 setup_apic_nmi_watchdog(NULL);
391 touch_nmi_watchdog();
392 }
320 return 0; 393 return 0;
321} 394}
322 395
@@ -336,7 +409,13 @@ static int __init init_lapic_nmi_sysfs(void)
336{ 409{
337 int error; 410 int error;
338 411
339 if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) 412 /* should really be a BUG_ON but b/c this is an
413 * init call, it just doesn't work. -dcz
414 */
415 if (nmi_watchdog != NMI_LOCAL_APIC)
416 return 0;
417
418 if ( atomic_read(&nmi_active) < 0 )
340 return 0; 419 return 0;
341 420
342 error = sysdev_class_register(&nmi_sysclass); 421 error = sysdev_class_register(&nmi_sysclass);
@@ -354,138 +433,269 @@ late_initcall(init_lapic_nmi_sysfs);
354 * Original code written by Keith Owens. 433 * Original code written by Keith Owens.
355 */ 434 */
356 435
357static void clear_msr_range(unsigned int base, unsigned int n) 436static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
358{
359 unsigned int i;
360
361 for(i = 0; i < n; ++i)
362 wrmsr(base+i, 0, 0);
363}
364
365static void write_watchdog_counter(const char *descr)
366{ 437{
367 u64 count = (u64)cpu_khz * 1000; 438 u64 count = (u64)cpu_khz * 1000;
368 439
369 do_div(count, nmi_hz); 440 do_div(count, nmi_hz);
370 if(descr) 441 if(descr)
371 Dprintk("setting %s to -0x%08Lx\n", descr, count); 442 Dprintk("setting %s to -0x%08Lx\n", descr, count);
372 wrmsrl(nmi_perfctr_msr, 0 - count); 443 wrmsrl(perfctr_msr, 0 - count);
373} 444}
374 445
375static void setup_k7_watchdog(void) 446/* Note that these events don't tick when the CPU idles. This means
447 the frequency varies with CPU load. */
448
449#define K7_EVNTSEL_ENABLE (1 << 22)
450#define K7_EVNTSEL_INT (1 << 20)
451#define K7_EVNTSEL_OS (1 << 17)
452#define K7_EVNTSEL_USR (1 << 16)
453#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
454#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
455
456static int setup_k7_watchdog(void)
376{ 457{
458 unsigned int perfctr_msr, evntsel_msr;
377 unsigned int evntsel; 459 unsigned int evntsel;
460 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
461
462 perfctr_msr = MSR_K7_PERFCTR0;
463 evntsel_msr = MSR_K7_EVNTSEL0;
464 if (!reserve_perfctr_nmi(perfctr_msr))
465 goto fail;
378 466
379 nmi_perfctr_msr = MSR_K7_PERFCTR0; 467 if (!reserve_evntsel_nmi(evntsel_msr))
468 goto fail1;
380 469
381 clear_msr_range(MSR_K7_EVNTSEL0, 4); 470 wrmsrl(perfctr_msr, 0UL);
382 clear_msr_range(MSR_K7_PERFCTR0, 4);
383 471
384 evntsel = K7_EVNTSEL_INT 472 evntsel = K7_EVNTSEL_INT
385 | K7_EVNTSEL_OS 473 | K7_EVNTSEL_OS
386 | K7_EVNTSEL_USR 474 | K7_EVNTSEL_USR
387 | K7_NMI_EVENT; 475 | K7_NMI_EVENT;
388 476
389 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 477 /* setup the timer */
390 write_watchdog_counter("K7_PERFCTR0"); 478 wrmsr(evntsel_msr, evntsel, 0);
479 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
391 apic_write(APIC_LVTPC, APIC_DM_NMI); 480 apic_write(APIC_LVTPC, APIC_DM_NMI);
392 evntsel |= K7_EVNTSEL_ENABLE; 481 evntsel |= K7_EVNTSEL_ENABLE;
393 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 482 wrmsr(evntsel_msr, evntsel, 0);
483
484 wd->perfctr_msr = perfctr_msr;
485 wd->evntsel_msr = evntsel_msr;
486 wd->cccr_msr = 0; //unused
487 wd->check_bit = 1ULL<<63;
488 return 1;
489fail1:
490 release_perfctr_nmi(perfctr_msr);
491fail:
492 return 0;
493}
494
495static void stop_k7_watchdog(void)
496{
497 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
498
499 wrmsr(wd->evntsel_msr, 0, 0);
500
501 release_evntsel_nmi(wd->evntsel_msr);
502 release_perfctr_nmi(wd->perfctr_msr);
394} 503}
395 504
396static void setup_p6_watchdog(void) 505#define P6_EVNTSEL0_ENABLE (1 << 22)
506#define P6_EVNTSEL_INT (1 << 20)
507#define P6_EVNTSEL_OS (1 << 17)
508#define P6_EVNTSEL_USR (1 << 16)
509#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
510#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
511
512static int setup_p6_watchdog(void)
397{ 513{
514 unsigned int perfctr_msr, evntsel_msr;
398 unsigned int evntsel; 515 unsigned int evntsel;
516 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
517
518 perfctr_msr = MSR_P6_PERFCTR0;
519 evntsel_msr = MSR_P6_EVNTSEL0;
520 if (!reserve_perfctr_nmi(perfctr_msr))
521 goto fail;
399 522
400 nmi_perfctr_msr = MSR_P6_PERFCTR0; 523 if (!reserve_evntsel_nmi(evntsel_msr))
524 goto fail1;
401 525
402 clear_msr_range(MSR_P6_EVNTSEL0, 2); 526 wrmsrl(perfctr_msr, 0UL);
403 clear_msr_range(MSR_P6_PERFCTR0, 2);
404 527
405 evntsel = P6_EVNTSEL_INT 528 evntsel = P6_EVNTSEL_INT
406 | P6_EVNTSEL_OS 529 | P6_EVNTSEL_OS
407 | P6_EVNTSEL_USR 530 | P6_EVNTSEL_USR
408 | P6_NMI_EVENT; 531 | P6_NMI_EVENT;
409 532
410 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); 533 /* setup the timer */
411 write_watchdog_counter("P6_PERFCTR0"); 534 wrmsr(evntsel_msr, evntsel, 0);
535 write_watchdog_counter(perfctr_msr, "P6_PERFCTR0");
412 apic_write(APIC_LVTPC, APIC_DM_NMI); 536 apic_write(APIC_LVTPC, APIC_DM_NMI);
413 evntsel |= P6_EVNTSEL0_ENABLE; 537 evntsel |= P6_EVNTSEL0_ENABLE;
414 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); 538 wrmsr(evntsel_msr, evntsel, 0);
539
540 wd->perfctr_msr = perfctr_msr;
541 wd->evntsel_msr = evntsel_msr;
542 wd->cccr_msr = 0; //unused
543 wd->check_bit = 1ULL<<39;
544 return 1;
545fail1:
546 release_perfctr_nmi(perfctr_msr);
547fail:
548 return 0;
549}
550
551static void stop_p6_watchdog(void)
552{
553 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
554
555 wrmsr(wd->evntsel_msr, 0, 0);
556
557 release_evntsel_nmi(wd->evntsel_msr);
558 release_perfctr_nmi(wd->perfctr_msr);
415} 559}
416 560
561/* Note that these events don't tick when the CPU idles. This means
562 the frequency varies with CPU load. */
563
564#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
565#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
566#define P4_ESCR_OS (1<<3)
567#define P4_ESCR_USR (1<<2)
568#define P4_CCCR_OVF_PMI0 (1<<26)
569#define P4_CCCR_OVF_PMI1 (1<<27)
570#define P4_CCCR_THRESHOLD(N) ((N)<<20)
571#define P4_CCCR_COMPLEMENT (1<<19)
572#define P4_CCCR_COMPARE (1<<18)
573#define P4_CCCR_REQUIRED (3<<16)
574#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
575#define P4_CCCR_ENABLE (1<<12)
576#define P4_CCCR_OVF (1<<31)
577/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
578 CRU_ESCR0 (with any non-null event selector) through a complemented
579 max threshold. [IA32-Vol3, Section 14.9.9] */
580
417static int setup_p4_watchdog(void) 581static int setup_p4_watchdog(void)
418{ 582{
583 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
584 unsigned int evntsel, cccr_val;
419 unsigned int misc_enable, dummy; 585 unsigned int misc_enable, dummy;
586 unsigned int ht_num;
587 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
420 588
421 rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); 589 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
422 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) 590 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
423 return 0; 591 return 0;
424 592
425 nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
426 nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
427#ifdef CONFIG_SMP 593#ifdef CONFIG_SMP
428 if (smp_num_siblings == 2) 594 /* detect which hyperthread we are on */
429 nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; 595 if (smp_num_siblings == 2) {
596 unsigned int ebx, apicid;
597
598 ebx = cpuid_ebx(1);
599 apicid = (ebx >> 24) & 0xff;
600 ht_num = apicid & 1;
601 } else
430#endif 602#endif
603 ht_num = 0;
431 604
432 if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) 605 /* performance counters are shared resources
433 clear_msr_range(0x3F1, 2); 606 * assign each hyperthread its own set
434 /* MSR 0x3F0 seems to have a default value of 0xFC00, but current 607 * (re-use the ESCR0 register, seems safe
435 docs doesn't fully define it, so leave it alone for now. */ 608 * and keeps the cccr_val the same)
436 if (boot_cpu_data.x86_model >= 0x3) { 609 */
437 /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ 610 if (!ht_num) {
438 clear_msr_range(0x3A0, 26); 611 /* logical cpu 0 */
439 clear_msr_range(0x3BC, 3); 612 perfctr_msr = MSR_P4_IQ_PERFCTR0;
613 evntsel_msr = MSR_P4_CRU_ESCR0;
614 cccr_msr = MSR_P4_IQ_CCCR0;
615 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
440 } else { 616 } else {
441 clear_msr_range(0x3A0, 31); 617 /* logical cpu 1 */
618 perfctr_msr = MSR_P4_IQ_PERFCTR1;
619 evntsel_msr = MSR_P4_CRU_ESCR0;
620 cccr_msr = MSR_P4_IQ_CCCR1;
621 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
442 } 622 }
443 clear_msr_range(0x3C0, 6); 623
444 clear_msr_range(0x3C8, 6); 624 if (!reserve_perfctr_nmi(perfctr_msr))
445 clear_msr_range(0x3E0, 2); 625 goto fail;
446 clear_msr_range(MSR_P4_CCCR0, 18); 626
447 clear_msr_range(MSR_P4_PERFCTR0, 18); 627 if (!reserve_evntsel_nmi(evntsel_msr))
448 628 goto fail1;
449 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); 629
450 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); 630 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
451 write_watchdog_counter("P4_IQ_COUNTER0"); 631 | P4_ESCR_OS
632 | P4_ESCR_USR;
633
634 cccr_val |= P4_CCCR_THRESHOLD(15)
635 | P4_CCCR_COMPLEMENT
636 | P4_CCCR_COMPARE
637 | P4_CCCR_REQUIRED;
638
639 wrmsr(evntsel_msr, evntsel, 0);
640 wrmsr(cccr_msr, cccr_val, 0);
641 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
452 apic_write(APIC_LVTPC, APIC_DM_NMI); 642 apic_write(APIC_LVTPC, APIC_DM_NMI);
453 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 643 cccr_val |= P4_CCCR_ENABLE;
644 wrmsr(cccr_msr, cccr_val, 0);
645 wd->perfctr_msr = perfctr_msr;
646 wd->evntsel_msr = evntsel_msr;
647 wd->cccr_msr = cccr_msr;
648 wd->check_bit = 1ULL<<39;
454 return 1; 649 return 1;
650fail1:
651 release_perfctr_nmi(perfctr_msr);
652fail:
653 return 0;
455} 654}
456 655
457static void disable_intel_arch_watchdog(void) 656static void stop_p4_watchdog(void)
458{ 657{
459 unsigned ebx; 658 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
460 659
461 /* 660 wrmsr(wd->cccr_msr, 0, 0);
462 * Check whether the Architectural PerfMon supports 661 wrmsr(wd->evntsel_msr, 0, 0);
463 * Unhalted Core Cycles Event or not. 662
464 * NOTE: Corresponding bit = 0 in ebp indicates event present. 663 release_evntsel_nmi(wd->evntsel_msr);
465 */ 664 release_perfctr_nmi(wd->perfctr_msr);
466 ebx = cpuid_ebx(10);
467 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
468 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
469} 665}
470 666
667#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
668#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
669
471static int setup_intel_arch_watchdog(void) 670static int setup_intel_arch_watchdog(void)
472{ 671{
672 unsigned int ebx;
673 union cpuid10_eax eax;
674 unsigned int unused;
675 unsigned int perfctr_msr, evntsel_msr;
473 unsigned int evntsel; 676 unsigned int evntsel;
474 unsigned ebx; 677 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
475 678
476 /* 679 /*
477 * Check whether the Architectural PerfMon supports 680 * Check whether the Architectural PerfMon supports
478 * Unhalted Core Cycles Event or not. 681 * Unhalted Core Cycles Event or not.
479 * NOTE: Corresponding bit = 0 in ebp indicates event present. 682 * NOTE: Corresponding bit = 0 in ebx indicates event present.
480 */ 683 */
481 ebx = cpuid_ebx(10); 684 cpuid(10, &(eax.full), &ebx, &unused, &unused);
482 if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) 685 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
483 return 0; 686 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
687 goto fail;
688
689 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
690 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
484 691
485 nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; 692 if (!reserve_perfctr_nmi(perfctr_msr))
693 goto fail;
486 694
487 clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); 695 if (!reserve_evntsel_nmi(evntsel_msr))
488 clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); 696 goto fail1;
697
698 wrmsrl(perfctr_msr, 0UL);
489 699
490 evntsel = ARCH_PERFMON_EVENTSEL_INT 700 evntsel = ARCH_PERFMON_EVENTSEL_INT
491 | ARCH_PERFMON_EVENTSEL_OS 701 | ARCH_PERFMON_EVENTSEL_OS
@@ -493,51 +703,145 @@ static int setup_intel_arch_watchdog(void)
493 | ARCH_PERFMON_NMI_EVENT_SEL 703 | ARCH_PERFMON_NMI_EVENT_SEL
494 | ARCH_PERFMON_NMI_EVENT_UMASK; 704 | ARCH_PERFMON_NMI_EVENT_UMASK;
495 705
496 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); 706 /* setup the timer */
497 write_watchdog_counter("INTEL_ARCH_PERFCTR0"); 707 wrmsr(evntsel_msr, evntsel, 0);
708 write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0");
498 apic_write(APIC_LVTPC, APIC_DM_NMI); 709 apic_write(APIC_LVTPC, APIC_DM_NMI);
499 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; 710 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
500 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); 711 wrmsr(evntsel_msr, evntsel, 0);
712
713 wd->perfctr_msr = perfctr_msr;
714 wd->evntsel_msr = evntsel_msr;
715 wd->cccr_msr = 0; //unused
716 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
501 return 1; 717 return 1;
718fail1:
719 release_perfctr_nmi(perfctr_msr);
720fail:
721 return 0;
502} 722}
503 723
504void setup_apic_nmi_watchdog (void) 724static void stop_intel_arch_watchdog(void)
505{ 725{
506 switch (boot_cpu_data.x86_vendor) { 726 unsigned int ebx;
507 case X86_VENDOR_AMD: 727 union cpuid10_eax eax;
508 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) 728 unsigned int unused;
509 return; 729 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
510 setup_k7_watchdog(); 730
511 break; 731 /*
512 case X86_VENDOR_INTEL: 732 * Check whether the Architectural PerfMon supports
513 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 733 * Unhalted Core Cycles Event or not.
514 if (!setup_intel_arch_watchdog()) 734 * NOTE: Corresponding bit = 0 in ebx indicates event present.
735 */
736 cpuid(10, &(eax.full), &ebx, &unused, &unused);
737 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
738 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
739 return;
740
741 wrmsr(wd->evntsel_msr, 0, 0);
742 release_evntsel_nmi(wd->evntsel_msr);
743 release_perfctr_nmi(wd->perfctr_msr);
744}
745
746void setup_apic_nmi_watchdog (void *unused)
747{
748 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
749
750 /* only support LOCAL and IO APICs for now */
751 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
752 (nmi_watchdog != NMI_IO_APIC))
753 return;
754
755 if (wd->enabled == 1)
756 return;
757
758 /* cheap hack to support suspend/resume */
759 /* if cpu0 is not active neither should the other cpus */
760 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
761 return;
762
763 if (nmi_watchdog == NMI_LOCAL_APIC) {
764 switch (boot_cpu_data.x86_vendor) {
765 case X86_VENDOR_AMD:
766 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
515 return; 767 return;
516 break; 768 if (!setup_k7_watchdog())
517 }
518 switch (boot_cpu_data.x86) {
519 case 6:
520 if (boot_cpu_data.x86_model > 0xd)
521 return; 769 return;
522
523 setup_p6_watchdog();
524 break; 770 break;
525 case 15: 771 case X86_VENDOR_INTEL:
526 if (boot_cpu_data.x86_model > 0x4) 772 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
527 return; 773 if (!setup_intel_arch_watchdog())
774 return;
775 break;
776 }
777 switch (boot_cpu_data.x86) {
778 case 6:
779 if (boot_cpu_data.x86_model > 0xd)
780 return;
781
782 if (!setup_p6_watchdog())
783 return;
784 break;
785 case 15:
786 if (boot_cpu_data.x86_model > 0x4)
787 return;
528 788
529 if (!setup_p4_watchdog()) 789 if (!setup_p4_watchdog())
790 return;
791 break;
792 default:
530 return; 793 return;
794 }
531 break; 795 break;
532 default: 796 default:
533 return; 797 return;
534 } 798 }
535 break; 799 }
536 default: 800 wd->enabled = 1;
801 atomic_inc(&nmi_active);
802}
803
804void stop_apic_nmi_watchdog(void *unused)
805{
806 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
807
808 /* only support LOCAL and IO APICs for now */
809 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
810 (nmi_watchdog != NMI_IO_APIC))
811 return;
812
813 if (wd->enabled == 0)
537 return; 814 return;
815
816 if (nmi_watchdog == NMI_LOCAL_APIC) {
817 switch (boot_cpu_data.x86_vendor) {
818 case X86_VENDOR_AMD:
819 stop_k7_watchdog();
820 break;
821 case X86_VENDOR_INTEL:
822 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
823 stop_intel_arch_watchdog();
824 break;
825 }
826 switch (boot_cpu_data.x86) {
827 case 6:
828 if (boot_cpu_data.x86_model > 0xd)
829 break;
830 stop_p6_watchdog();
831 break;
832 case 15:
833 if (boot_cpu_data.x86_model > 0x4)
834 break;
835 stop_p4_watchdog();
836 break;
837 }
838 break;
839 default:
840 return;
841 }
538 } 842 }
539 lapic_nmi_owner = LAPIC_NMI_WATCHDOG; 843 wd->enabled = 0;
540 nmi_active = 1; 844 atomic_dec(&nmi_active);
541} 845}
542 846
543/* 847/*
@@ -579,7 +883,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog);
579 883
580extern void die_nmi(struct pt_regs *, const char *msg); 884extern void die_nmi(struct pt_regs *, const char *msg);
581 885
582void nmi_watchdog_tick (struct pt_regs * regs) 886__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
583{ 887{
584 888
585 /* 889 /*
@@ -588,11 +892,23 @@ void nmi_watchdog_tick (struct pt_regs * regs)
588 * smp_processor_id(). 892 * smp_processor_id().
589 */ 893 */
590 unsigned int sum; 894 unsigned int sum;
895 int touched = 0;
591 int cpu = smp_processor_id(); 896 int cpu = smp_processor_id();
897 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
898 u64 dummy;
899 int rc=0;
900
901 /* check for other users first */
902 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
903 == NOTIFY_STOP) {
904 rc = 1;
905 touched = 1;
906 }
592 907
593 sum = per_cpu(irq_stat, cpu).apic_timer_irqs; 908 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
594 909
595 if (last_irq_sums[cpu] == sum) { 910 /* if the apic timer isn't firing, this cpu isn't doing much */
911 if (!touched && last_irq_sums[cpu] == sum) {
596 /* 912 /*
597 * Ayiee, looks like this CPU is stuck ... 913 * Ayiee, looks like this CPU is stuck ...
598 * wait a few IRQs (5 seconds) before doing the oops ... 914 * wait a few IRQs (5 seconds) before doing the oops ...
@@ -607,27 +923,59 @@ void nmi_watchdog_tick (struct pt_regs * regs)
607 last_irq_sums[cpu] = sum; 923 last_irq_sums[cpu] = sum;
608 alert_counter[cpu] = 0; 924 alert_counter[cpu] = 0;
609 } 925 }
610 if (nmi_perfctr_msr) { 926 /* see if the nmi watchdog went off */
611 if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { 927 if (wd->enabled) {
612 /* 928 if (nmi_watchdog == NMI_LOCAL_APIC) {
613 * P4 quirks: 929 rdmsrl(wd->perfctr_msr, dummy);
614 * - An overflown perfctr will assert its interrupt 930 if (dummy & wd->check_bit){
615 * until the OVF flag in its CCCR is cleared. 931 /* this wasn't a watchdog timer interrupt */
616 * - LVTPC is masked on interrupt and must be 932 goto done;
617 * unmasked by the LVTPC handler. 933 }
934
935 /* only Intel P4 uses the cccr msr */
936 if (wd->cccr_msr != 0) {
937 /*
938 * P4 quirks:
939 * - An overflown perfctr will assert its interrupt
940 * until the OVF flag in its CCCR is cleared.
941 * - LVTPC is masked on interrupt and must be
942 * unmasked by the LVTPC handler.
943 */
944 rdmsrl(wd->cccr_msr, dummy);
945 dummy &= ~P4_CCCR_OVF;
946 wrmsrl(wd->cccr_msr, dummy);
947 apic_write(APIC_LVTPC, APIC_DM_NMI);
948 }
949 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
950 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
951 /* P6 based Pentium M need to re-unmask
952 * the apic vector but it doesn't hurt
953 * other P6 variant.
954 * ArchPerfom/Core Duo also needs this */
955 apic_write(APIC_LVTPC, APIC_DM_NMI);
956 }
957 /* start the cycle over again */
958 write_watchdog_counter(wd->perfctr_msr, NULL);
959 rc = 1;
960 } else if (nmi_watchdog == NMI_IO_APIC) {
961 /* don't know how to accurately check for this.
962 * just assume it was a watchdog timer interrupt
963 * This matches the old behaviour.
618 */ 964 */
619 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 965 rc = 1;
620 apic_write(APIC_LVTPC, APIC_DM_NMI);
621 } 966 }
622 else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
623 nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
624 /* Only P6 based Pentium M need to re-unmask
625 * the apic vector but it doesn't hurt
626 * other P6 variant */
627 apic_write(APIC_LVTPC, APIC_DM_NMI);
628 }
629 write_watchdog_counter(NULL);
630 } 967 }
968done:
969 return rc;
970}
971
972int do_nmi_callback(struct pt_regs * regs, int cpu)
973{
974#ifdef CONFIG_SYSCTL
975 if (unknown_nmi_panic)
976 return unknown_nmi_panic_callback(regs, cpu);
977#endif
978 return 0;
631} 979}
632 980
633#ifdef CONFIG_SYSCTL 981#ifdef CONFIG_SYSCTL
@@ -637,36 +985,46 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
637 unsigned char reason = get_nmi_reason(); 985 unsigned char reason = get_nmi_reason();
638 char buf[64]; 986 char buf[64];
639 987
640 if (!(reason & 0xc0)) { 988 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
641 sprintf(buf, "NMI received for unknown reason %02x\n", reason); 989 die_nmi(regs, buf);
642 die_nmi(regs, buf);
643 }
644 return 0; 990 return 0;
645} 991}
646 992
647/* 993/*
648 * proc handler for /proc/sys/kernel/unknown_nmi_panic 994 * proc handler for /proc/sys/kernel/nmi
649 */ 995 */
650int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, 996int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
651 void __user *buffer, size_t *length, loff_t *ppos) 997 void __user *buffer, size_t *length, loff_t *ppos)
652{ 998{
653 int old_state; 999 int old_state;
654 1000
655 old_state = unknown_nmi_panic; 1001 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
1002 old_state = nmi_watchdog_enabled;
656 proc_dointvec(table, write, file, buffer, length, ppos); 1003 proc_dointvec(table, write, file, buffer, length, ppos);
657 if (!!old_state == !!unknown_nmi_panic) 1004 if (!!old_state == !!nmi_watchdog_enabled)
658 return 0; 1005 return 0;
659 1006
660 if (unknown_nmi_panic) { 1007 if (atomic_read(&nmi_active) < 0) {
661 if (reserve_lapic_nmi() < 0) { 1008 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
662 unknown_nmi_panic = 0; 1009 return -EIO;
663 return -EBUSY; 1010 }
664 } else { 1011
665 set_nmi_callback(unknown_nmi_panic_callback); 1012 if (nmi_watchdog == NMI_DEFAULT) {
666 } 1013 if (nmi_known_cpu() > 0)
1014 nmi_watchdog = NMI_LOCAL_APIC;
1015 else
1016 nmi_watchdog = NMI_IO_APIC;
1017 }
1018
1019 if (nmi_watchdog == NMI_LOCAL_APIC) {
1020 if (nmi_watchdog_enabled)
1021 enable_lapic_nmi_watchdog();
1022 else
1023 disable_lapic_nmi_watchdog();
667 } else { 1024 } else {
668 release_lapic_nmi(); 1025 printk( KERN_WARNING
669 unset_nmi_callback(); 1026 "NMI watchdog doesn't know what hardware to touch\n");
1027 return -EIO;
670 } 1028 }
671 return 0; 1029 return 0;
672} 1030}
@@ -675,7 +1033,11 @@ int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
675 1033
676EXPORT_SYMBOL(nmi_active); 1034EXPORT_SYMBOL(nmi_active);
677EXPORT_SYMBOL(nmi_watchdog); 1035EXPORT_SYMBOL(nmi_watchdog);
678EXPORT_SYMBOL(reserve_lapic_nmi); 1036EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
679EXPORT_SYMBOL(release_lapic_nmi); 1037EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1038EXPORT_SYMBOL(reserve_perfctr_nmi);
1039EXPORT_SYMBOL(release_perfctr_nmi);
1040EXPORT_SYMBOL(reserve_evntsel_nmi);
1041EXPORT_SYMBOL(release_evntsel_nmi);
680EXPORT_SYMBOL(disable_timer_nmi_watchdog); 1042EXPORT_SYMBOL(disable_timer_nmi_watchdog);
681EXPORT_SYMBOL(enable_timer_nmi_watchdog); 1043EXPORT_SYMBOL(enable_timer_nmi_watchdog);