aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDon Zickus <dzickus@redhat.com>2006-09-26 04:52:26 -0400
committerAndi Kleen <andi@basil.nowhere.org>2006-09-26 04:52:26 -0400
commitb7471c6da94d30d3deadc55986cc38d1ff57f9ca (patch)
tree6aa23314273763acccbe9ddd0b8bd442edde0509
parentf2802e7f571c05f9a901b1f5bd144aa730ccc88e (diff)
[PATCH] i386: Add SMP support on i386 to reservation framework
This patch includes the changes to make the nmi watchdog on i386 SMP aware. A bunch of code was moved around to make it simpler to read. In addition, it is now possible to determine if a particular NMI was the result of the watchdog or not. This feature allows the kernel to filter out unknown NMIs easier. Signed-off-by: Don Zickus <dzickus@redhat.com> Signed-off-by: Andi Kleen <ak@suse.de>
-rw-r--r--arch/i386/kernel/apic.c3
-rw-r--r--arch/i386/kernel/nmi.c537
-rw-r--r--arch/i386/kernel/traps.c2
-rw-r--r--arch/i386/oprofile/nmi_timer_int.c4
-rw-r--r--include/asm-i386/nmi.h5
5 files changed, 360 insertions, 191 deletions
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 8c844d07862f..1a34fc57800b 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -586,8 +586,7 @@ void __devinit setup_local_APIC(void)
586 printk("No ESR for 82489DX.\n"); 586 printk("No ESR for 82489DX.\n");
587 } 587 }
588 588
589 if (nmi_watchdog == NMI_LOCAL_APIC) 589 setup_apic_nmi_watchdog(NULL);
590 setup_apic_nmi_watchdog();
591 apic_pm_activate(); 590 apic_pm_activate();
592} 591}
593 592
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 5d58dfeacd59..d88004343034 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -24,16 +24,10 @@
24 24
25#include <asm/smp.h> 25#include <asm/smp.h>
26#include <asm/nmi.h> 26#include <asm/nmi.h>
27#include <asm/kdebug.h>
27 28
28#include "mach_traps.h" 29#include "mach_traps.h"
29 30
30unsigned int nmi_watchdog = NMI_NONE;
31extern int unknown_nmi_panic;
32static unsigned int nmi_hz = HZ;
33static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
34static unsigned int nmi_p4_cccr_val;
35extern void show_registers(struct pt_regs *regs);
36
37/* perfctr_nmi_owner tracks the ownership of the perfctr registers: 31/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
38 * evtsel_nmi_owner tracks the ownership of the event selection 32 * evtsel_nmi_owner tracks the ownership of the event selection
39 * - different performance counters/ event selection may be reserved for 33 * - different performance counters/ event selection may be reserved for
@@ -63,51 +57,31 @@ static unsigned int lapic_nmi_owner;
63#define LAPIC_NMI_RESERVED (1<<1) 57#define LAPIC_NMI_RESERVED (1<<1)
64 58
65/* nmi_active: 59/* nmi_active:
66 * +1: the lapic NMI watchdog is active, but can be disabled 60 * >0: the lapic NMI watchdog is active, but can be disabled
67 * 0: the lapic NMI watchdog has not been set up, and cannot 61 * <0: the lapic NMI watchdog has not been set up, and cannot
68 * be enabled 62 * be enabled
69 * -1: the lapic NMI watchdog is disabled, but can be enabled 63 * 0: the lapic NMI watchdog is disabled, but can be enabled
70 */ 64 */
71int nmi_active; 65atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
72 66
73#define K7_EVNTSEL_ENABLE (1 << 22) 67unsigned int nmi_watchdog = NMI_DEFAULT;
74#define K7_EVNTSEL_INT (1 << 20) 68static unsigned int nmi_hz = HZ;
75#define K7_EVNTSEL_OS (1 << 17)
76#define K7_EVNTSEL_USR (1 << 16)
77#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
78#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
79 69
80#define P6_EVNTSEL0_ENABLE (1 << 22) 70struct nmi_watchdog_ctlblk {
81#define P6_EVNTSEL_INT (1 << 20) 71 int enabled;
82#define P6_EVNTSEL_OS (1 << 17) 72 u64 check_bit;
83#define P6_EVNTSEL_USR (1 << 16) 73 unsigned int cccr_msr;
84#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 74 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
85#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED 75 unsigned int evntsel_msr; /* the MSR to select the events to handle */
76};
77static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
86 78
87#define MSR_P4_MISC_ENABLE 0x1A0 79/* local prototypes */
88#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) 80static void stop_apic_nmi_watchdog(void *unused);
89#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) 81static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
90#define MSR_P4_PERFCTR0 0x300 82
91#define MSR_P4_CCCR0 0x360 83extern void show_registers(struct pt_regs *regs);
92#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) 84extern int unknown_nmi_panic;
93#define P4_ESCR_OS (1<<3)
94#define P4_ESCR_USR (1<<2)
95#define P4_CCCR_OVF_PMI0 (1<<26)
96#define P4_CCCR_OVF_PMI1 (1<<27)
97#define P4_CCCR_THRESHOLD(N) ((N)<<20)
98#define P4_CCCR_COMPLEMENT (1<<19)
99#define P4_CCCR_COMPARE (1<<18)
100#define P4_CCCR_REQUIRED (3<<16)
101#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
102#define P4_CCCR_ENABLE (1<<12)
103/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
104 CRU_ESCR0 (with any non-null event selector) through a complemented
105 max threshold. [IA32-Vol3, Section 14.9.9] */
106#define MSR_P4_IQ_COUNTER0 0x30C
107#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
108#define P4_NMI_IQ_CCCR0 \
109 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
110 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
111 85
112/* converts an msr to an appropriate reservation bit */ 86/* converts an msr to an appropriate reservation bit */
113static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) 87static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
@@ -208,6 +182,17 @@ void release_evntsel_nmi(unsigned int msr)
208 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]); 182 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
209} 183}
210 184
185static __cpuinit inline int nmi_known_cpu(void)
186{
187 switch (boot_cpu_data.x86_vendor) {
188 case X86_VENDOR_AMD:
189 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
190 case X86_VENDOR_INTEL:
191 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
192 }
193 return 0;
194}
195
211#ifdef CONFIG_SMP 196#ifdef CONFIG_SMP
212/* The performance counters used by NMI_LOCAL_APIC don't trigger when 197/* The performance counters used by NMI_LOCAL_APIC don't trigger when
213 * the CPU is idle. To make sure the NMI watchdog really ticks on all 198 * the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -234,7 +219,10 @@ static int __init check_nmi_watchdog(void)
234 unsigned int *prev_nmi_count; 219 unsigned int *prev_nmi_count;
235 int cpu; 220 int cpu;
236 221
237 if (nmi_watchdog == NMI_NONE) 222 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
223 return 0;
224
225 if (!atomic_read(&nmi_active))
238 return 0; 226 return 0;
239 227
240 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); 228 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
@@ -258,18 +246,22 @@ static int __init check_nmi_watchdog(void)
258 if (!cpu_isset(cpu, cpu_callin_map)) 246 if (!cpu_isset(cpu, cpu_callin_map))
259 continue; 247 continue;
260#endif 248#endif
249 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
250 continue;
261 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { 251 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
262 endflag = 1;
263 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", 252 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
264 cpu, 253 cpu,
265 prev_nmi_count[cpu], 254 prev_nmi_count[cpu],
266 nmi_count(cpu)); 255 nmi_count(cpu));
267 nmi_active = 0; 256 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
268 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; 257 atomic_dec(&nmi_active);
269 kfree(prev_nmi_count);
270 return -1;
271 } 258 }
272 } 259 }
260 if (!atomic_read(&nmi_active)) {
261 kfree(prev_nmi_count);
262 atomic_set(&nmi_active, -1);
263 return -1;
264 }
273 endflag = 1; 265 endflag = 1;
274 printk("OK.\n"); 266 printk("OK.\n");
275 267
@@ -290,31 +282,16 @@ static int __init setup_nmi_watchdog(char *str)
290 282
291 get_option(&str, &nmi); 283 get_option(&str, &nmi);
292 284
293 if (nmi >= NMI_INVALID) 285 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
294 return 0; 286 return 0;
295 if (nmi == NMI_NONE)
296 nmi_watchdog = nmi;
297 /* 287 /*
298 * If any other x86 CPU has a local APIC, then 288 * If any other x86 CPU has a local APIC, then
299 * please test the NMI stuff there and send me the 289 * please test the NMI stuff there and send me the
300 * missing bits. Right now Intel P6/P4 and AMD K7 only. 290 * missing bits. Right now Intel P6/P4 and AMD K7 only.
301 */ 291 */
302 if ((nmi == NMI_LOCAL_APIC) && 292 if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
303 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 293 return 0; /* no lapic support */
304 (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) 294 nmi_watchdog = nmi;
305 nmi_watchdog = nmi;
306 if ((nmi == NMI_LOCAL_APIC) &&
307 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
308 (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
309 nmi_watchdog = nmi;
310 /*
311 * We can enable the IO-APIC watchdog
312 * unconditionally.
313 */
314 if (nmi == NMI_IO_APIC) {
315 nmi_active = 1;
316 nmi_watchdog = nmi;
317 }
318 return 1; 295 return 1;
319} 296}
320 297
@@ -322,41 +299,30 @@ __setup("nmi_watchdog=", setup_nmi_watchdog);
322 299
323static void disable_lapic_nmi_watchdog(void) 300static void disable_lapic_nmi_watchdog(void)
324{ 301{
325 if (nmi_active <= 0) 302 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
303
304 if (atomic_read(&nmi_active) <= 0)
326 return; 305 return;
327 switch (boot_cpu_data.x86_vendor) {
328 case X86_VENDOR_AMD:
329 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
330 break;
331 case X86_VENDOR_INTEL:
332 switch (boot_cpu_data.x86) {
333 case 6:
334 if (boot_cpu_data.x86_model > 0xd)
335 break;
336 306
337 wrmsr(MSR_P6_EVNTSEL0, 0, 0); 307 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
338 break;
339 case 15:
340 if (boot_cpu_data.x86_model > 0x4)
341 break;
342 308
343 wrmsr(MSR_P4_IQ_CCCR0, 0, 0); 309 BUG_ON(atomic_read(&nmi_active) != 0);
344 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
345 break;
346 }
347 break;
348 }
349 nmi_active = -1;
350 /* tell do_nmi() and others that we're not active any more */
351 nmi_watchdog = 0;
352} 310}
353 311
354static void enable_lapic_nmi_watchdog(void) 312static void enable_lapic_nmi_watchdog(void)
355{ 313{
356 if (nmi_active < 0) { 314 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
357 nmi_watchdog = NMI_LOCAL_APIC; 315
358 setup_apic_nmi_watchdog(); 316 /* are we already enabled */
359 } 317 if (atomic_read(&nmi_active) != 0)
318 return;
319
320 /* are we lapic aware */
321 if (nmi_known_cpu() <= 0)
322 return;
323
324 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
325 touch_nmi_watchdog();
360} 326}
361 327
362int reserve_lapic_nmi(void) 328int reserve_lapic_nmi(void)
@@ -388,20 +354,25 @@ void release_lapic_nmi(void)
388 354
389void disable_timer_nmi_watchdog(void) 355void disable_timer_nmi_watchdog(void)
390{ 356{
391 if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) 357 BUG_ON(nmi_watchdog != NMI_IO_APIC);
358
359 if (atomic_read(&nmi_active) <= 0)
392 return; 360 return;
393 361
394 unset_nmi_callback(); 362 disable_irq(0);
395 nmi_active = -1; 363 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
396 nmi_watchdog = NMI_NONE; 364
365 BUG_ON(atomic_read(&nmi_active) != 0);
397} 366}
398 367
399void enable_timer_nmi_watchdog(void) 368void enable_timer_nmi_watchdog(void)
400{ 369{
401 if (nmi_active < 0) { 370 BUG_ON(nmi_watchdog != NMI_IO_APIC);
402 nmi_watchdog = NMI_IO_APIC; 371
372 if (atomic_read(&nmi_active) == 0) {
403 touch_nmi_watchdog(); 373 touch_nmi_watchdog();
404 nmi_active = 1; 374 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
375 enable_irq(0);
405 } 376 }
406} 377}
407 378
@@ -411,7 +382,7 @@ static int nmi_pm_active; /* nmi_active before suspend */
411 382
412static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) 383static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
413{ 384{
414 nmi_pm_active = nmi_active; 385 nmi_pm_active = atomic_read(&nmi_active);
415 disable_lapic_nmi_watchdog(); 386 disable_lapic_nmi_watchdog();
416 return 0; 387 return 0;
417} 388}
@@ -439,7 +410,13 @@ static int __init init_lapic_nmi_sysfs(void)
439{ 410{
440 int error; 411 int error;
441 412
442 if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) 413 /* should really be a BUG_ON but b/c this is an
414 * init call, it just doesn't work. -dcz
415 */
416 if (nmi_watchdog != NMI_LOCAL_APIC)
417 return 0;
418
419 if ( atomic_read(&nmi_active) < 0 )
443 return 0; 420 return 0;
444 421
445 error = sysdev_class_register(&nmi_sysclass); 422 error = sysdev_class_register(&nmi_sysclass);
@@ -457,143 +434,312 @@ late_initcall(init_lapic_nmi_sysfs);
457 * Original code written by Keith Owens. 434 * Original code written by Keith Owens.
458 */ 435 */
459 436
460static void write_watchdog_counter(const char *descr) 437static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
461{ 438{
462 u64 count = (u64)cpu_khz * 1000; 439 u64 count = (u64)cpu_khz * 1000;
463 440
464 do_div(count, nmi_hz); 441 do_div(count, nmi_hz);
465 if(descr) 442 if(descr)
466 Dprintk("setting %s to -0x%08Lx\n", descr, count); 443 Dprintk("setting %s to -0x%08Lx\n", descr, count);
467 wrmsrl(nmi_perfctr_msr, 0 - count); 444 wrmsrl(perfctr_msr, 0 - count);
468} 445}
469 446
447/* Note that these events don't tick when the CPU idles. This means
448 the frequency varies with CPU load. */
449
450#define K7_EVNTSEL_ENABLE (1 << 22)
451#define K7_EVNTSEL_INT (1 << 20)
452#define K7_EVNTSEL_OS (1 << 17)
453#define K7_EVNTSEL_USR (1 << 16)
454#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
455#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
456
470static int setup_k7_watchdog(void) 457static int setup_k7_watchdog(void)
471{ 458{
459 unsigned int perfctr_msr, evntsel_msr;
472 unsigned int evntsel; 460 unsigned int evntsel;
461 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
473 462
474 nmi_perfctr_msr = MSR_K7_PERFCTR0; 463 perfctr_msr = MSR_K7_PERFCTR0;
475 464 evntsel_msr = MSR_K7_EVNTSEL0;
476 if (!reserve_perfctr_nmi(nmi_perfctr_msr)) 465 if (!reserve_perfctr_nmi(perfctr_msr))
477 goto fail; 466 goto fail;
478 467
479 if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0)) 468 if (!reserve_evntsel_nmi(evntsel_msr))
480 goto fail1; 469 goto fail1;
481 470
482 wrmsrl(MSR_K7_PERFCTR0, 0UL); 471 wrmsrl(perfctr_msr, 0UL);
483 472
484 evntsel = K7_EVNTSEL_INT 473 evntsel = K7_EVNTSEL_INT
485 | K7_EVNTSEL_OS 474 | K7_EVNTSEL_OS
486 | K7_EVNTSEL_USR 475 | K7_EVNTSEL_USR
487 | K7_NMI_EVENT; 476 | K7_NMI_EVENT;
488 477
489 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 478 /* setup the timer */
490 write_watchdog_counter("K7_PERFCTR0"); 479 wrmsr(evntsel_msr, evntsel, 0);
480 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
491 apic_write(APIC_LVTPC, APIC_DM_NMI); 481 apic_write(APIC_LVTPC, APIC_DM_NMI);
492 evntsel |= K7_EVNTSEL_ENABLE; 482 evntsel |= K7_EVNTSEL_ENABLE;
493 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 483 wrmsr(evntsel_msr, evntsel, 0);
484
485 wd->perfctr_msr = perfctr_msr;
486 wd->evntsel_msr = evntsel_msr;
487 wd->cccr_msr = 0; //unused
488 wd->check_bit = 1ULL<<63;
494 return 1; 489 return 1;
495fail1: 490fail1:
496 release_perfctr_nmi(nmi_perfctr_msr); 491 release_perfctr_nmi(perfctr_msr);
497fail: 492fail:
498 return 0; 493 return 0;
499} 494}
500 495
496static void stop_k7_watchdog(void)
497{
498 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
499
500 wrmsr(wd->evntsel_msr, 0, 0);
501
502 release_evntsel_nmi(wd->evntsel_msr);
503 release_perfctr_nmi(wd->perfctr_msr);
504}
505
506#define P6_EVNTSEL0_ENABLE (1 << 22)
507#define P6_EVNTSEL_INT (1 << 20)
508#define P6_EVNTSEL_OS (1 << 17)
509#define P6_EVNTSEL_USR (1 << 16)
510#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
511#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
512
501static int setup_p6_watchdog(void) 513static int setup_p6_watchdog(void)
502{ 514{
515 unsigned int perfctr_msr, evntsel_msr;
503 unsigned int evntsel; 516 unsigned int evntsel;
517 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
504 518
505 nmi_perfctr_msr = MSR_P6_PERFCTR0; 519 perfctr_msr = MSR_P6_PERFCTR0;
506 520 evntsel_msr = MSR_P6_EVNTSEL0;
507 if (!reserve_perfctr_nmi(nmi_perfctr_msr)) 521 if (!reserve_perfctr_nmi(perfctr_msr))
508 goto fail; 522 goto fail;
509 523
510 if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0)) 524 if (!reserve_evntsel_nmi(evntsel_msr))
511 goto fail1; 525 goto fail1;
512 526
527 wrmsrl(perfctr_msr, 0UL);
528
513 evntsel = P6_EVNTSEL_INT 529 evntsel = P6_EVNTSEL_INT
514 | P6_EVNTSEL_OS 530 | P6_EVNTSEL_OS
515 | P6_EVNTSEL_USR 531 | P6_EVNTSEL_USR
516 | P6_NMI_EVENT; 532 | P6_NMI_EVENT;
517 533
518 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); 534 /* setup the timer */
519 write_watchdog_counter("P6_PERFCTR0"); 535 wrmsr(evntsel_msr, evntsel, 0);
536 write_watchdog_counter(perfctr_msr, "P6_PERFCTR0");
520 apic_write(APIC_LVTPC, APIC_DM_NMI); 537 apic_write(APIC_LVTPC, APIC_DM_NMI);
521 evntsel |= P6_EVNTSEL0_ENABLE; 538 evntsel |= P6_EVNTSEL0_ENABLE;
522 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); 539 wrmsr(evntsel_msr, evntsel, 0);
540
541 wd->perfctr_msr = perfctr_msr;
542 wd->evntsel_msr = evntsel_msr;
543 wd->cccr_msr = 0; //unused
544 wd->check_bit = 1ULL<<39;
523 return 1; 545 return 1;
524fail1: 546fail1:
525 release_perfctr_nmi(nmi_perfctr_msr); 547 release_perfctr_nmi(perfctr_msr);
526fail: 548fail:
527 return 0; 549 return 0;
528} 550}
529 551
552static void stop_p6_watchdog(void)
553{
554 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
555
556 wrmsr(wd->evntsel_msr, 0, 0);
557
558 release_evntsel_nmi(wd->evntsel_msr);
559 release_perfctr_nmi(wd->perfctr_msr);
560}
561
562/* Note that these events don't tick when the CPU idles. This means
563 the frequency varies with CPU load. */
564
565#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
566#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
567#define P4_ESCR_OS (1<<3)
568#define P4_ESCR_USR (1<<2)
569#define P4_CCCR_OVF_PMI0 (1<<26)
570#define P4_CCCR_OVF_PMI1 (1<<27)
571#define P4_CCCR_THRESHOLD(N) ((N)<<20)
572#define P4_CCCR_COMPLEMENT (1<<19)
573#define P4_CCCR_COMPARE (1<<18)
574#define P4_CCCR_REQUIRED (3<<16)
575#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
576#define P4_CCCR_ENABLE (1<<12)
577#define P4_CCCR_OVF (1<<31)
578/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
579 CRU_ESCR0 (with any non-null event selector) through a complemented
580 max threshold. [IA32-Vol3, Section 14.9.9] */
581
530static int setup_p4_watchdog(void) 582static int setup_p4_watchdog(void)
531{ 583{
584 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
585 unsigned int evntsel, cccr_val;
532 unsigned int misc_enable, dummy; 586 unsigned int misc_enable, dummy;
587 unsigned int ht_num;
588 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
533 589
534 rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); 590 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
535 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) 591 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
536 return 0; 592 return 0;
537 593
538 nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
539 nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
540#ifdef CONFIG_SMP 594#ifdef CONFIG_SMP
541 if (smp_num_siblings == 2) 595 /* detect which hyperthread we are on */
542 nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; 596 if (smp_num_siblings == 2) {
597 unsigned int ebx, apicid;
598
599 ebx = cpuid_ebx(1);
600 apicid = (ebx >> 24) & 0xff;
601 ht_num = apicid & 1;
602 } else
543#endif 603#endif
604 ht_num = 0;
605
606 /* performance counters are shared resources
607 * assign each hyperthread its own set
608 * (re-use the ESCR0 register, seems safe
609 * and keeps the cccr_val the same)
610 */
611 if (!ht_num) {
612 /* logical cpu 0 */
613 perfctr_msr = MSR_P4_IQ_PERFCTR0;
614 evntsel_msr = MSR_P4_CRU_ESCR0;
615 cccr_msr = MSR_P4_IQ_CCCR0;
616 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
617 } else {
618 /* logical cpu 1 */
619 perfctr_msr = MSR_P4_IQ_PERFCTR1;
620 evntsel_msr = MSR_P4_CRU_ESCR0;
621 cccr_msr = MSR_P4_IQ_CCCR1;
622 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
623 }
544 624
545 if (!reserve_perfctr_nmi(nmi_perfctr_msr)) 625 if (!reserve_perfctr_nmi(perfctr_msr))
546 goto fail; 626 goto fail;
547 627
548 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) 628 if (!reserve_evntsel_nmi(evntsel_msr))
549 goto fail1; 629 goto fail1;
550 630
551 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); 631 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
552 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); 632 | P4_ESCR_OS
553 write_watchdog_counter("P4_IQ_COUNTER0"); 633 | P4_ESCR_USR;
634
635 cccr_val |= P4_CCCR_THRESHOLD(15)
636 | P4_CCCR_COMPLEMENT
637 | P4_CCCR_COMPARE
638 | P4_CCCR_REQUIRED;
639
640 wrmsr(evntsel_msr, evntsel, 0);
641 wrmsr(cccr_msr, cccr_val, 0);
642 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
554 apic_write(APIC_LVTPC, APIC_DM_NMI); 643 apic_write(APIC_LVTPC, APIC_DM_NMI);
555 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 644 cccr_val |= P4_CCCR_ENABLE;
645 wrmsr(cccr_msr, cccr_val, 0);
646 wd->perfctr_msr = perfctr_msr;
647 wd->evntsel_msr = evntsel_msr;
648 wd->cccr_msr = cccr_msr;
649 wd->check_bit = 1ULL<<39;
556 return 1; 650 return 1;
557fail1: 651fail1:
558 release_perfctr_nmi(nmi_perfctr_msr); 652 release_perfctr_nmi(perfctr_msr);
559fail: 653fail:
560 return 0; 654 return 0;
561} 655}
562 656
563void setup_apic_nmi_watchdog (void) 657static void stop_p4_watchdog(void)
564{ 658{
565 switch (boot_cpu_data.x86_vendor) { 659 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
566 case X86_VENDOR_AMD: 660
567 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) 661 wrmsr(wd->cccr_msr, 0, 0);
568 return; 662 wrmsr(wd->evntsel_msr, 0, 0);
569 if (!setup_k7_watchdog())
570 return;
571 break;
572 case X86_VENDOR_INTEL:
573 switch (boot_cpu_data.x86) {
574 case 6:
575 if (boot_cpu_data.x86_model > 0xd)
576 return;
577 663
578 if(!setup_p6_watchdog()) 664 release_evntsel_nmi(wd->evntsel_msr);
665 release_perfctr_nmi(wd->perfctr_msr);
666}
667
668void setup_apic_nmi_watchdog (void *unused)
669{
670 /* only support LOCAL and IO APICs for now */
671 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
672 (nmi_watchdog != NMI_IO_APIC))
673 return;
674
675 if (nmi_watchdog == NMI_LOCAL_APIC) {
676 switch (boot_cpu_data.x86_vendor) {
677 case X86_VENDOR_AMD:
678 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
579 return; 679 return;
580 break; 680 if (!setup_k7_watchdog())
581 case 15:
582 if (boot_cpu_data.x86_model > 0x4)
583 return; 681 return;
682 break;
683 case X86_VENDOR_INTEL:
684 switch (boot_cpu_data.x86) {
685 case 6:
686 if (boot_cpu_data.x86_model > 0xd)
687 return;
688
689 if (!setup_p6_watchdog())
690 return;
691 break;
692 case 15:
693 if (boot_cpu_data.x86_model > 0x4)
694 return;
584 695
585 if (!setup_p4_watchdog()) 696 if (!setup_p4_watchdog())
697 return;
698 break;
699 default:
586 return; 700 return;
701 }
702 break;
703 default:
704 return;
705 }
706 }
707 __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 1;
708 atomic_inc(&nmi_active);
709}
710
711static void stop_apic_nmi_watchdog(void *unused)
712{
713 /* only support LOCAL and IO APICs for now */
714 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
715 (nmi_watchdog != NMI_IO_APIC))
716 return;
717
718 if (nmi_watchdog == NMI_LOCAL_APIC) {
719 switch (boot_cpu_data.x86_vendor) {
720 case X86_VENDOR_AMD:
721 stop_k7_watchdog();
722 break;
723 case X86_VENDOR_INTEL:
724 switch (boot_cpu_data.x86) {
725 case 6:
726 if (boot_cpu_data.x86_model > 0xd)
727 break;
728 stop_p6_watchdog();
729 break;
730 case 15:
731 if (boot_cpu_data.x86_model > 0x4)
732 break;
733 stop_p4_watchdog();
734 break;
735 }
587 break; 736 break;
588 default: 737 default:
589 return; 738 return;
590 } 739 }
591 break;
592 default:
593 return;
594 } 740 }
595 lapic_nmi_owner = LAPIC_NMI_WATCHDOG; 741 __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 0;
596 nmi_active = 1; 742 atomic_dec(&nmi_active);
597} 743}
598 744
599/* 745/*
@@ -635,7 +781,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog);
635 781
636extern void die_nmi(struct pt_regs *, const char *msg); 782extern void die_nmi(struct pt_regs *, const char *msg);
637 783
638void nmi_watchdog_tick (struct pt_regs * regs) 784void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
639{ 785{
640 786
641 /* 787 /*
@@ -644,11 +790,21 @@ void nmi_watchdog_tick (struct pt_regs * regs)
644 * smp_processor_id(). 790 * smp_processor_id().
645 */ 791 */
646 unsigned int sum; 792 unsigned int sum;
793 int touched = 0;
647 int cpu = smp_processor_id(); 794 int cpu = smp_processor_id();
795 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
796 u64 dummy;
797
798 /* check for other users first */
799 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
800 == NOTIFY_STOP) {
801 touched = 1;
802 }
648 803
649 sum = per_cpu(irq_stat, cpu).apic_timer_irqs; 804 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
650 805
651 if (last_irq_sums[cpu] == sum) { 806 /* if the apic timer isn't firing, this cpu isn't doing much */
807 if (!touched && last_irq_sums[cpu] == sum) {
652 /* 808 /*
653 * Ayiee, looks like this CPU is stuck ... 809 * Ayiee, looks like this CPU is stuck ...
654 * wait a few IRQs (5 seconds) before doing the oops ... 810 * wait a few IRQs (5 seconds) before doing the oops ...
@@ -663,26 +819,41 @@ void nmi_watchdog_tick (struct pt_regs * regs)
663 last_irq_sums[cpu] = sum; 819 last_irq_sums[cpu] = sum;
664 alert_counter[cpu] = 0; 820 alert_counter[cpu] = 0;
665 } 821 }
666 if (nmi_perfctr_msr) { 822 /* see if the nmi watchdog went off */
667 if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { 823 if (wd->enabled) {
668 /* 824 if (nmi_watchdog == NMI_LOCAL_APIC) {
669 * P4 quirks: 825 rdmsrl(wd->perfctr_msr, dummy);
670 * - An overflown perfctr will assert its interrupt 826 if (dummy & wd->check_bit){
671 * until the OVF flag in its CCCR is cleared. 827 /* this wasn't a watchdog timer interrupt */
672 * - LVTPC is masked on interrupt and must be 828 goto done;
673 * unmasked by the LVTPC handler. 829 }
674 */ 830
675 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 831 /* only Intel P4 uses the cccr msr */
676 apic_write(APIC_LVTPC, APIC_DM_NMI); 832 if (wd->cccr_msr != 0) {
677 } 833 /*
678 else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) { 834 * P4 quirks:
679 /* Only P6 based Pentium M need to re-unmask 835 * - An overflown perfctr will assert its interrupt
680 * the apic vector but it doesn't hurt 836 * until the OVF flag in its CCCR is cleared.
681 * other P6 variant */ 837 * - LVTPC is masked on interrupt and must be
682 apic_write(APIC_LVTPC, APIC_DM_NMI); 838 * unmasked by the LVTPC handler.
839 */
840 rdmsrl(wd->cccr_msr, dummy);
841 dummy &= ~P4_CCCR_OVF;
842 wrmsrl(wd->cccr_msr, dummy);
843 apic_write(APIC_LVTPC, APIC_DM_NMI);
844 }
845 else if (wd->perfctr_msr == MSR_P6_PERFCTR0) {
846 /* Only P6 based Pentium M need to re-unmask
847 * the apic vector but it doesn't hurt
848 * other P6 variant */
849 apic_write(APIC_LVTPC, APIC_DM_NMI);
850 }
851 /* start the cycle over again */
852 write_watchdog_counter(wd->perfctr_msr, NULL);
683 } 853 }
684 write_watchdog_counter(NULL);
685 } 854 }
855done:
856 return;
686} 857}
687 858
688#ifdef CONFIG_SYSCTL 859#ifdef CONFIG_SYSCTL
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 7e9edafffd8a..3a07b2677e2a 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -724,7 +724,7 @@ static void default_do_nmi(struct pt_regs * regs)
724 * so it must be the NMI watchdog. 724 * so it must be the NMI watchdog.
725 */ 725 */
726 if (nmi_watchdog) { 726 if (nmi_watchdog) {
727 nmi_watchdog_tick(regs); 727 nmi_watchdog_tick(regs, reason);
728 return; 728 return;
729 } 729 }
730#endif 730#endif
diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c
index 930a1127bb30..a33a73bb502d 100644
--- a/arch/i386/oprofile/nmi_timer_int.c
+++ b/arch/i386/oprofile/nmi_timer_int.c
@@ -42,9 +42,7 @@ static void timer_stop(void)
42 42
43int __init op_nmi_timer_init(struct oprofile_operations * ops) 43int __init op_nmi_timer_init(struct oprofile_operations * ops)
44{ 44{
45 extern int nmi_active; 45 if (atomic_read(&nmi_active) <= 0)
46
47 if (nmi_active <= 0)
48 return -ENODEV; 46 return -ENODEV;
49 47
50 ops->start = timer_start; 48 ops->start = timer_start;
diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h
index 27fc9e6f630e..4cda6801ecb8 100644
--- a/include/asm-i386/nmi.h
+++ b/include/asm-i386/nmi.h
@@ -32,13 +32,14 @@ extern void release_perfctr_nmi(unsigned int);
32extern int reserve_evntsel_nmi(unsigned int); 32extern int reserve_evntsel_nmi(unsigned int);
33extern void release_evntsel_nmi(unsigned int); 33extern void release_evntsel_nmi(unsigned int);
34 34
35extern void setup_apic_nmi_watchdog (void); 35extern void setup_apic_nmi_watchdog (void *);
36extern int reserve_lapic_nmi(void); 36extern int reserve_lapic_nmi(void);
37extern void release_lapic_nmi(void); 37extern void release_lapic_nmi(void);
38extern void disable_timer_nmi_watchdog(void); 38extern void disable_timer_nmi_watchdog(void);
39extern void enable_timer_nmi_watchdog(void); 39extern void enable_timer_nmi_watchdog(void);
40extern void nmi_watchdog_tick (struct pt_regs * regs); 40extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
41 41
42extern atomic_t nmi_active;
42extern unsigned int nmi_watchdog; 43extern unsigned int nmi_watchdog;
43#define NMI_DEFAULT -1 44#define NMI_DEFAULT -1
44#define NMI_NONE 0 45#define NMI_NONE 0