diff options
author | Don Zickus <dzickus@redhat.com> | 2006-09-26 04:52:26 -0400 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2006-09-26 04:52:26 -0400 |
commit | b7471c6da94d30d3deadc55986cc38d1ff57f9ca (patch) | |
tree | 6aa23314273763acccbe9ddd0b8bd442edde0509 /arch/i386/kernel/nmi.c | |
parent | f2802e7f571c05f9a901b1f5bd144aa730ccc88e (diff) |
[PATCH] i386: Add SMP support on i386 to reservation framework
This patch includes the changes to make the nmi watchdog on i386 SMP aware.
A bunch of code was moved around to make it simpler to read. In addition,
it is now possible to determine if a particular NMI was the result of the
watchdog or not. This feature allows the kernel to filter out unknown NMIs
easier.
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Diffstat (limited to 'arch/i386/kernel/nmi.c')
-rw-r--r-- | arch/i386/kernel/nmi.c | 537 |
1 files changed, 354 insertions, 183 deletions
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 5d58dfeacd59..d88004343034 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
@@ -24,16 +24,10 @@ | |||
24 | 24 | ||
25 | #include <asm/smp.h> | 25 | #include <asm/smp.h> |
26 | #include <asm/nmi.h> | 26 | #include <asm/nmi.h> |
27 | #include <asm/kdebug.h> | ||
27 | 28 | ||
28 | #include "mach_traps.h" | 29 | #include "mach_traps.h" |
29 | 30 | ||
30 | unsigned int nmi_watchdog = NMI_NONE; | ||
31 | extern int unknown_nmi_panic; | ||
32 | static unsigned int nmi_hz = HZ; | ||
33 | static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ | ||
34 | static unsigned int nmi_p4_cccr_val; | ||
35 | extern void show_registers(struct pt_regs *regs); | ||
36 | |||
37 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: | 31 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: |
38 | * evtsel_nmi_owner tracks the ownership of the event selection | 32 | * evtsel_nmi_owner tracks the ownership of the event selection |
39 | * - different performance counters/ event selection may be reserved for | 33 | * - different performance counters/ event selection may be reserved for |
@@ -63,51 +57,31 @@ static unsigned int lapic_nmi_owner; | |||
63 | #define LAPIC_NMI_RESERVED (1<<1) | 57 | #define LAPIC_NMI_RESERVED (1<<1) |
64 | 58 | ||
65 | /* nmi_active: | 59 | /* nmi_active: |
66 | * +1: the lapic NMI watchdog is active, but can be disabled | 60 | * >0: the lapic NMI watchdog is active, but can be disabled |
67 | * 0: the lapic NMI watchdog has not been set up, and cannot | 61 | * <0: the lapic NMI watchdog has not been set up, and cannot |
68 | * be enabled | 62 | * be enabled |
69 | * -1: the lapic NMI watchdog is disabled, but can be enabled | 63 | * 0: the lapic NMI watchdog is disabled, but can be enabled |
70 | */ | 64 | */ |
71 | int nmi_active; | 65 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ |
72 | 66 | ||
73 | #define K7_EVNTSEL_ENABLE (1 << 22) | 67 | unsigned int nmi_watchdog = NMI_DEFAULT; |
74 | #define K7_EVNTSEL_INT (1 << 20) | 68 | static unsigned int nmi_hz = HZ; |
75 | #define K7_EVNTSEL_OS (1 << 17) | ||
76 | #define K7_EVNTSEL_USR (1 << 16) | ||
77 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
78 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
79 | 69 | ||
80 | #define P6_EVNTSEL0_ENABLE (1 << 22) | 70 | struct nmi_watchdog_ctlblk { |
81 | #define P6_EVNTSEL_INT (1 << 20) | 71 | int enabled; |
82 | #define P6_EVNTSEL_OS (1 << 17) | 72 | u64 check_bit; |
83 | #define P6_EVNTSEL_USR (1 << 16) | 73 | unsigned int cccr_msr; |
84 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | 74 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ |
85 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | 75 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ |
76 | }; | ||
77 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | ||
86 | 78 | ||
87 | #define MSR_P4_MISC_ENABLE 0x1A0 | 79 | /* local prototypes */ |
88 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | 80 | static void stop_apic_nmi_watchdog(void *unused); |
89 | #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) | 81 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); |
90 | #define MSR_P4_PERFCTR0 0x300 | 82 | |
91 | #define MSR_P4_CCCR0 0x360 | 83 | extern void show_registers(struct pt_regs *regs); |
92 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | 84 | extern int unknown_nmi_panic; |
93 | #define P4_ESCR_OS (1<<3) | ||
94 | #define P4_ESCR_USR (1<<2) | ||
95 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
96 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
97 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
98 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
99 | #define P4_CCCR_COMPARE (1<<18) | ||
100 | #define P4_CCCR_REQUIRED (3<<16) | ||
101 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
102 | #define P4_CCCR_ENABLE (1<<12) | ||
103 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
104 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
105 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
106 | #define MSR_P4_IQ_COUNTER0 0x30C | ||
107 | #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) | ||
108 | #define P4_NMI_IQ_CCCR0 \ | ||
109 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ | ||
110 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) | ||
111 | 85 | ||
112 | /* converts an msr to an appropriate reservation bit */ | 86 | /* converts an msr to an appropriate reservation bit */ |
113 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | 87 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) |
@@ -208,6 +182,17 @@ void release_evntsel_nmi(unsigned int msr) | |||
208 | clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]); | 182 | clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]); |
209 | } | 183 | } |
210 | 184 | ||
185 | static __cpuinit inline int nmi_known_cpu(void) | ||
186 | { | ||
187 | switch (boot_cpu_data.x86_vendor) { | ||
188 | case X86_VENDOR_AMD: | ||
189 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); | ||
190 | case X86_VENDOR_INTEL: | ||
191 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); | ||
192 | } | ||
193 | return 0; | ||
194 | } | ||
195 | |||
211 | #ifdef CONFIG_SMP | 196 | #ifdef CONFIG_SMP |
212 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when | 197 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when |
213 | * the CPU is idle. To make sure the NMI watchdog really ticks on all | 198 | * the CPU is idle. To make sure the NMI watchdog really ticks on all |
@@ -234,7 +219,10 @@ static int __init check_nmi_watchdog(void) | |||
234 | unsigned int *prev_nmi_count; | 219 | unsigned int *prev_nmi_count; |
235 | int cpu; | 220 | int cpu; |
236 | 221 | ||
237 | if (nmi_watchdog == NMI_NONE) | 222 | if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT)) |
223 | return 0; | ||
224 | |||
225 | if (!atomic_read(&nmi_active)) | ||
238 | return 0; | 226 | return 0; |
239 | 227 | ||
240 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); | 228 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); |
@@ -258,18 +246,22 @@ static int __init check_nmi_watchdog(void) | |||
258 | if (!cpu_isset(cpu, cpu_callin_map)) | 246 | if (!cpu_isset(cpu, cpu_callin_map)) |
259 | continue; | 247 | continue; |
260 | #endif | 248 | #endif |
249 | if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) | ||
250 | continue; | ||
261 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { | 251 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { |
262 | endflag = 1; | ||
263 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", | 252 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", |
264 | cpu, | 253 | cpu, |
265 | prev_nmi_count[cpu], | 254 | prev_nmi_count[cpu], |
266 | nmi_count(cpu)); | 255 | nmi_count(cpu)); |
267 | nmi_active = 0; | 256 | per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; |
268 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; | 257 | atomic_dec(&nmi_active); |
269 | kfree(prev_nmi_count); | ||
270 | return -1; | ||
271 | } | 258 | } |
272 | } | 259 | } |
260 | if (!atomic_read(&nmi_active)) { | ||
261 | kfree(prev_nmi_count); | ||
262 | atomic_set(&nmi_active, -1); | ||
263 | return -1; | ||
264 | } | ||
273 | endflag = 1; | 265 | endflag = 1; |
274 | printk("OK.\n"); | 266 | printk("OK.\n"); |
275 | 267 | ||
@@ -290,31 +282,16 @@ static int __init setup_nmi_watchdog(char *str) | |||
290 | 282 | ||
291 | get_option(&str, &nmi); | 283 | get_option(&str, &nmi); |
292 | 284 | ||
293 | if (nmi >= NMI_INVALID) | 285 | if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) |
294 | return 0; | 286 | return 0; |
295 | if (nmi == NMI_NONE) | ||
296 | nmi_watchdog = nmi; | ||
297 | /* | 287 | /* |
298 | * If any other x86 CPU has a local APIC, then | 288 | * If any other x86 CPU has a local APIC, then |
299 | * please test the NMI stuff there and send me the | 289 | * please test the NMI stuff there and send me the |
300 | * missing bits. Right now Intel P6/P4 and AMD K7 only. | 290 | * missing bits. Right now Intel P6/P4 and AMD K7 only. |
301 | */ | 291 | */ |
302 | if ((nmi == NMI_LOCAL_APIC) && | 292 | if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0)) |
303 | (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | 293 | return 0; /* no lapic support */ |
304 | (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) | 294 | nmi_watchdog = nmi; |
305 | nmi_watchdog = nmi; | ||
306 | if ((nmi == NMI_LOCAL_APIC) && | ||
307 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && | ||
308 | (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) | ||
309 | nmi_watchdog = nmi; | ||
310 | /* | ||
311 | * We can enable the IO-APIC watchdog | ||
312 | * unconditionally. | ||
313 | */ | ||
314 | if (nmi == NMI_IO_APIC) { | ||
315 | nmi_active = 1; | ||
316 | nmi_watchdog = nmi; | ||
317 | } | ||
318 | return 1; | 295 | return 1; |
319 | } | 296 | } |
320 | 297 | ||
@@ -322,41 +299,30 @@ __setup("nmi_watchdog=", setup_nmi_watchdog); | |||
322 | 299 | ||
323 | static void disable_lapic_nmi_watchdog(void) | 300 | static void disable_lapic_nmi_watchdog(void) |
324 | { | 301 | { |
325 | if (nmi_active <= 0) | 302 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); |
303 | |||
304 | if (atomic_read(&nmi_active) <= 0) | ||
326 | return; | 305 | return; |
327 | switch (boot_cpu_data.x86_vendor) { | ||
328 | case X86_VENDOR_AMD: | ||
329 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); | ||
330 | break; | ||
331 | case X86_VENDOR_INTEL: | ||
332 | switch (boot_cpu_data.x86) { | ||
333 | case 6: | ||
334 | if (boot_cpu_data.x86_model > 0xd) | ||
335 | break; | ||
336 | 306 | ||
337 | wrmsr(MSR_P6_EVNTSEL0, 0, 0); | 307 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); |
338 | break; | ||
339 | case 15: | ||
340 | if (boot_cpu_data.x86_model > 0x4) | ||
341 | break; | ||
342 | 308 | ||
343 | wrmsr(MSR_P4_IQ_CCCR0, 0, 0); | 309 | BUG_ON(atomic_read(&nmi_active) != 0); |
344 | wrmsr(MSR_P4_CRU_ESCR0, 0, 0); | ||
345 | break; | ||
346 | } | ||
347 | break; | ||
348 | } | ||
349 | nmi_active = -1; | ||
350 | /* tell do_nmi() and others that we're not active any more */ | ||
351 | nmi_watchdog = 0; | ||
352 | } | 310 | } |
353 | 311 | ||
354 | static void enable_lapic_nmi_watchdog(void) | 312 | static void enable_lapic_nmi_watchdog(void) |
355 | { | 313 | { |
356 | if (nmi_active < 0) { | 314 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); |
357 | nmi_watchdog = NMI_LOCAL_APIC; | 315 | |
358 | setup_apic_nmi_watchdog(); | 316 | /* are we already enabled */ |
359 | } | 317 | if (atomic_read(&nmi_active) != 0) |
318 | return; | ||
319 | |||
320 | /* are we lapic aware */ | ||
321 | if (nmi_known_cpu() <= 0) | ||
322 | return; | ||
323 | |||
324 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); | ||
325 | touch_nmi_watchdog(); | ||
360 | } | 326 | } |
361 | 327 | ||
362 | int reserve_lapic_nmi(void) | 328 | int reserve_lapic_nmi(void) |
@@ -388,20 +354,25 @@ void release_lapic_nmi(void) | |||
388 | 354 | ||
389 | void disable_timer_nmi_watchdog(void) | 355 | void disable_timer_nmi_watchdog(void) |
390 | { | 356 | { |
391 | if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) | 357 | BUG_ON(nmi_watchdog != NMI_IO_APIC); |
358 | |||
359 | if (atomic_read(&nmi_active) <= 0) | ||
392 | return; | 360 | return; |
393 | 361 | ||
394 | unset_nmi_callback(); | 362 | disable_irq(0); |
395 | nmi_active = -1; | 363 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); |
396 | nmi_watchdog = NMI_NONE; | 364 | |
365 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
397 | } | 366 | } |
398 | 367 | ||
399 | void enable_timer_nmi_watchdog(void) | 368 | void enable_timer_nmi_watchdog(void) |
400 | { | 369 | { |
401 | if (nmi_active < 0) { | 370 | BUG_ON(nmi_watchdog != NMI_IO_APIC); |
402 | nmi_watchdog = NMI_IO_APIC; | 371 | |
372 | if (atomic_read(&nmi_active) == 0) { | ||
403 | touch_nmi_watchdog(); | 373 | touch_nmi_watchdog(); |
404 | nmi_active = 1; | 374 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); |
375 | enable_irq(0); | ||
405 | } | 376 | } |
406 | } | 377 | } |
407 | 378 | ||
@@ -411,7 +382,7 @@ static int nmi_pm_active; /* nmi_active before suspend */ | |||
411 | 382 | ||
412 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) | 383 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) |
413 | { | 384 | { |
414 | nmi_pm_active = nmi_active; | 385 | nmi_pm_active = atomic_read(&nmi_active); |
415 | disable_lapic_nmi_watchdog(); | 386 | disable_lapic_nmi_watchdog(); |
416 | return 0; | 387 | return 0; |
417 | } | 388 | } |
@@ -439,7 +410,13 @@ static int __init init_lapic_nmi_sysfs(void) | |||
439 | { | 410 | { |
440 | int error; | 411 | int error; |
441 | 412 | ||
442 | if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) | 413 | /* should really be a BUG_ON but b/c this is an |
414 | * init call, it just doesn't work. -dcz | ||
415 | */ | ||
416 | if (nmi_watchdog != NMI_LOCAL_APIC) | ||
417 | return 0; | ||
418 | |||
419 | if ( atomic_read(&nmi_active) < 0 ) | ||
443 | return 0; | 420 | return 0; |
444 | 421 | ||
445 | error = sysdev_class_register(&nmi_sysclass); | 422 | error = sysdev_class_register(&nmi_sysclass); |
@@ -457,143 +434,312 @@ late_initcall(init_lapic_nmi_sysfs); | |||
457 | * Original code written by Keith Owens. | 434 | * Original code written by Keith Owens. |
458 | */ | 435 | */ |
459 | 436 | ||
460 | static void write_watchdog_counter(const char *descr) | 437 | static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr) |
461 | { | 438 | { |
462 | u64 count = (u64)cpu_khz * 1000; | 439 | u64 count = (u64)cpu_khz * 1000; |
463 | 440 | ||
464 | do_div(count, nmi_hz); | 441 | do_div(count, nmi_hz); |
465 | if(descr) | 442 | if(descr) |
466 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | 443 | Dprintk("setting %s to -0x%08Lx\n", descr, count); |
467 | wrmsrl(nmi_perfctr_msr, 0 - count); | 444 | wrmsrl(perfctr_msr, 0 - count); |
468 | } | 445 | } |
469 | 446 | ||
447 | /* Note that these events don't tick when the CPU idles. This means | ||
448 | the frequency varies with CPU load. */ | ||
449 | |||
450 | #define K7_EVNTSEL_ENABLE (1 << 22) | ||
451 | #define K7_EVNTSEL_INT (1 << 20) | ||
452 | #define K7_EVNTSEL_OS (1 << 17) | ||
453 | #define K7_EVNTSEL_USR (1 << 16) | ||
454 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
455 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
456 | |||
470 | static int setup_k7_watchdog(void) | 457 | static int setup_k7_watchdog(void) |
471 | { | 458 | { |
459 | unsigned int perfctr_msr, evntsel_msr; | ||
472 | unsigned int evntsel; | 460 | unsigned int evntsel; |
461 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
473 | 462 | ||
474 | nmi_perfctr_msr = MSR_K7_PERFCTR0; | 463 | perfctr_msr = MSR_K7_PERFCTR0; |
475 | 464 | evntsel_msr = MSR_K7_EVNTSEL0; | |
476 | if (!reserve_perfctr_nmi(nmi_perfctr_msr)) | 465 | if (!reserve_perfctr_nmi(perfctr_msr)) |
477 | goto fail; | 466 | goto fail; |
478 | 467 | ||
479 | if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0)) | 468 | if (!reserve_evntsel_nmi(evntsel_msr)) |
480 | goto fail1; | 469 | goto fail1; |
481 | 470 | ||
482 | wrmsrl(MSR_K7_PERFCTR0, 0UL); | 471 | wrmsrl(perfctr_msr, 0UL); |
483 | 472 | ||
484 | evntsel = K7_EVNTSEL_INT | 473 | evntsel = K7_EVNTSEL_INT |
485 | | K7_EVNTSEL_OS | 474 | | K7_EVNTSEL_OS |
486 | | K7_EVNTSEL_USR | 475 | | K7_EVNTSEL_USR |
487 | | K7_NMI_EVENT; | 476 | | K7_NMI_EVENT; |
488 | 477 | ||
489 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 478 | /* setup the timer */ |
490 | write_watchdog_counter("K7_PERFCTR0"); | 479 | wrmsr(evntsel_msr, evntsel, 0); |
480 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0"); | ||
491 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 481 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
492 | evntsel |= K7_EVNTSEL_ENABLE; | 482 | evntsel |= K7_EVNTSEL_ENABLE; |
493 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 483 | wrmsr(evntsel_msr, evntsel, 0); |
484 | |||
485 | wd->perfctr_msr = perfctr_msr; | ||
486 | wd->evntsel_msr = evntsel_msr; | ||
487 | wd->cccr_msr = 0; //unused | ||
488 | wd->check_bit = 1ULL<<63; | ||
494 | return 1; | 489 | return 1; |
495 | fail1: | 490 | fail1: |
496 | release_perfctr_nmi(nmi_perfctr_msr); | 491 | release_perfctr_nmi(perfctr_msr); |
497 | fail: | 492 | fail: |
498 | return 0; | 493 | return 0; |
499 | } | 494 | } |
500 | 495 | ||
496 | static void stop_k7_watchdog(void) | ||
497 | { | ||
498 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
499 | |||
500 | wrmsr(wd->evntsel_msr, 0, 0); | ||
501 | |||
502 | release_evntsel_nmi(wd->evntsel_msr); | ||
503 | release_perfctr_nmi(wd->perfctr_msr); | ||
504 | } | ||
505 | |||
506 | #define P6_EVNTSEL0_ENABLE (1 << 22) | ||
507 | #define P6_EVNTSEL_INT (1 << 20) | ||
508 | #define P6_EVNTSEL_OS (1 << 17) | ||
509 | #define P6_EVNTSEL_USR (1 << 16) | ||
510 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | ||
511 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | ||
512 | |||
501 | static int setup_p6_watchdog(void) | 513 | static int setup_p6_watchdog(void) |
502 | { | 514 | { |
515 | unsigned int perfctr_msr, evntsel_msr; | ||
503 | unsigned int evntsel; | 516 | unsigned int evntsel; |
517 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
504 | 518 | ||
505 | nmi_perfctr_msr = MSR_P6_PERFCTR0; | 519 | perfctr_msr = MSR_P6_PERFCTR0; |
506 | 520 | evntsel_msr = MSR_P6_EVNTSEL0; | |
507 | if (!reserve_perfctr_nmi(nmi_perfctr_msr)) | 521 | if (!reserve_perfctr_nmi(perfctr_msr)) |
508 | goto fail; | 522 | goto fail; |
509 | 523 | ||
510 | if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0)) | 524 | if (!reserve_evntsel_nmi(evntsel_msr)) |
511 | goto fail1; | 525 | goto fail1; |
512 | 526 | ||
527 | wrmsrl(perfctr_msr, 0UL); | ||
528 | |||
513 | evntsel = P6_EVNTSEL_INT | 529 | evntsel = P6_EVNTSEL_INT |
514 | | P6_EVNTSEL_OS | 530 | | P6_EVNTSEL_OS |
515 | | P6_EVNTSEL_USR | 531 | | P6_EVNTSEL_USR |
516 | | P6_NMI_EVENT; | 532 | | P6_NMI_EVENT; |
517 | 533 | ||
518 | wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); | 534 | /* setup the timer */ |
519 | write_watchdog_counter("P6_PERFCTR0"); | 535 | wrmsr(evntsel_msr, evntsel, 0); |
536 | write_watchdog_counter(perfctr_msr, "P6_PERFCTR0"); | ||
520 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 537 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
521 | evntsel |= P6_EVNTSEL0_ENABLE; | 538 | evntsel |= P6_EVNTSEL0_ENABLE; |
522 | wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); | 539 | wrmsr(evntsel_msr, evntsel, 0); |
540 | |||
541 | wd->perfctr_msr = perfctr_msr; | ||
542 | wd->evntsel_msr = evntsel_msr; | ||
543 | wd->cccr_msr = 0; //unused | ||
544 | wd->check_bit = 1ULL<<39; | ||
523 | return 1; | 545 | return 1; |
524 | fail1: | 546 | fail1: |
525 | release_perfctr_nmi(nmi_perfctr_msr); | 547 | release_perfctr_nmi(perfctr_msr); |
526 | fail: | 548 | fail: |
527 | return 0; | 549 | return 0; |
528 | } | 550 | } |
529 | 551 | ||
552 | static void stop_p6_watchdog(void) | ||
553 | { | ||
554 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
555 | |||
556 | wrmsr(wd->evntsel_msr, 0, 0); | ||
557 | |||
558 | release_evntsel_nmi(wd->evntsel_msr); | ||
559 | release_perfctr_nmi(wd->perfctr_msr); | ||
560 | } | ||
561 | |||
562 | /* Note that these events don't tick when the CPU idles. This means | ||
563 | the frequency varies with CPU load. */ | ||
564 | |||
565 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | ||
566 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
567 | #define P4_ESCR_OS (1<<3) | ||
568 | #define P4_ESCR_USR (1<<2) | ||
569 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
570 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
571 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
572 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
573 | #define P4_CCCR_COMPARE (1<<18) | ||
574 | #define P4_CCCR_REQUIRED (3<<16) | ||
575 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
576 | #define P4_CCCR_ENABLE (1<<12) | ||
577 | #define P4_CCCR_OVF (1<<31) | ||
578 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
579 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
580 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
581 | |||
530 | static int setup_p4_watchdog(void) | 582 | static int setup_p4_watchdog(void) |
531 | { | 583 | { |
584 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | ||
585 | unsigned int evntsel, cccr_val; | ||
532 | unsigned int misc_enable, dummy; | 586 | unsigned int misc_enable, dummy; |
587 | unsigned int ht_num; | ||
588 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
533 | 589 | ||
534 | rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); | 590 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); |
535 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | 591 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) |
536 | return 0; | 592 | return 0; |
537 | 593 | ||
538 | nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; | ||
539 | nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; | ||
540 | #ifdef CONFIG_SMP | 594 | #ifdef CONFIG_SMP |
541 | if (smp_num_siblings == 2) | 595 | /* detect which hyperthread we are on */ |
542 | nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; | 596 | if (smp_num_siblings == 2) { |
597 | unsigned int ebx, apicid; | ||
598 | |||
599 | ebx = cpuid_ebx(1); | ||
600 | apicid = (ebx >> 24) & 0xff; | ||
601 | ht_num = apicid & 1; | ||
602 | } else | ||
543 | #endif | 603 | #endif |
604 | ht_num = 0; | ||
605 | |||
606 | /* performance counters are shared resources | ||
607 | * assign each hyperthread its own set | ||
608 | * (re-use the ESCR0 register, seems safe | ||
609 | * and keeps the cccr_val the same) | ||
610 | */ | ||
611 | if (!ht_num) { | ||
612 | /* logical cpu 0 */ | ||
613 | perfctr_msr = MSR_P4_IQ_PERFCTR0; | ||
614 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
615 | cccr_msr = MSR_P4_IQ_CCCR0; | ||
616 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | ||
617 | } else { | ||
618 | /* logical cpu 1 */ | ||
619 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | ||
620 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
621 | cccr_msr = MSR_P4_IQ_CCCR1; | ||
622 | cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | ||
623 | } | ||
544 | 624 | ||
545 | if (!reserve_perfctr_nmi(nmi_perfctr_msr)) | 625 | if (!reserve_perfctr_nmi(perfctr_msr)) |
546 | goto fail; | 626 | goto fail; |
547 | 627 | ||
548 | if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) | 628 | if (!reserve_evntsel_nmi(evntsel_msr)) |
549 | goto fail1; | 629 | goto fail1; |
550 | 630 | ||
551 | wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); | 631 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) |
552 | wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); | 632 | | P4_ESCR_OS |
553 | write_watchdog_counter("P4_IQ_COUNTER0"); | 633 | | P4_ESCR_USR; |
634 | |||
635 | cccr_val |= P4_CCCR_THRESHOLD(15) | ||
636 | | P4_CCCR_COMPLEMENT | ||
637 | | P4_CCCR_COMPARE | ||
638 | | P4_CCCR_REQUIRED; | ||
639 | |||
640 | wrmsr(evntsel_msr, evntsel, 0); | ||
641 | wrmsr(cccr_msr, cccr_val, 0); | ||
642 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0"); | ||
554 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 643 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
555 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 644 | cccr_val |= P4_CCCR_ENABLE; |
645 | wrmsr(cccr_msr, cccr_val, 0); | ||
646 | wd->perfctr_msr = perfctr_msr; | ||
647 | wd->evntsel_msr = evntsel_msr; | ||
648 | wd->cccr_msr = cccr_msr; | ||
649 | wd->check_bit = 1ULL<<39; | ||
556 | return 1; | 650 | return 1; |
557 | fail1: | 651 | fail1: |
558 | release_perfctr_nmi(nmi_perfctr_msr); | 652 | release_perfctr_nmi(perfctr_msr); |
559 | fail: | 653 | fail: |
560 | return 0; | 654 | return 0; |
561 | } | 655 | } |
562 | 656 | ||
563 | void setup_apic_nmi_watchdog (void) | 657 | static void stop_p4_watchdog(void) |
564 | { | 658 | { |
565 | switch (boot_cpu_data.x86_vendor) { | 659 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
566 | case X86_VENDOR_AMD: | 660 | |
567 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) | 661 | wrmsr(wd->cccr_msr, 0, 0); |
568 | return; | 662 | wrmsr(wd->evntsel_msr, 0, 0); |
569 | if (!setup_k7_watchdog()) | ||
570 | return; | ||
571 | break; | ||
572 | case X86_VENDOR_INTEL: | ||
573 | switch (boot_cpu_data.x86) { | ||
574 | case 6: | ||
575 | if (boot_cpu_data.x86_model > 0xd) | ||
576 | return; | ||
577 | 663 | ||
578 | if(!setup_p6_watchdog()) | 664 | release_evntsel_nmi(wd->evntsel_msr); |
665 | release_perfctr_nmi(wd->perfctr_msr); | ||
666 | } | ||
667 | |||
668 | void setup_apic_nmi_watchdog (void *unused) | ||
669 | { | ||
670 | /* only support LOCAL and IO APICs for now */ | ||
671 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | ||
672 | (nmi_watchdog != NMI_IO_APIC)) | ||
673 | return; | ||
674 | |||
675 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
676 | switch (boot_cpu_data.x86_vendor) { | ||
677 | case X86_VENDOR_AMD: | ||
678 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) | ||
579 | return; | 679 | return; |
580 | break; | 680 | if (!setup_k7_watchdog()) |
581 | case 15: | ||
582 | if (boot_cpu_data.x86_model > 0x4) | ||
583 | return; | 681 | return; |
682 | break; | ||
683 | case X86_VENDOR_INTEL: | ||
684 | switch (boot_cpu_data.x86) { | ||
685 | case 6: | ||
686 | if (boot_cpu_data.x86_model > 0xd) | ||
687 | return; | ||
688 | |||
689 | if (!setup_p6_watchdog()) | ||
690 | return; | ||
691 | break; | ||
692 | case 15: | ||
693 | if (boot_cpu_data.x86_model > 0x4) | ||
694 | return; | ||
584 | 695 | ||
585 | if (!setup_p4_watchdog()) | 696 | if (!setup_p4_watchdog()) |
697 | return; | ||
698 | break; | ||
699 | default: | ||
586 | return; | 700 | return; |
701 | } | ||
702 | break; | ||
703 | default: | ||
704 | return; | ||
705 | } | ||
706 | } | ||
707 | __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 1; | ||
708 | atomic_inc(&nmi_active); | ||
709 | } | ||
710 | |||
711 | static void stop_apic_nmi_watchdog(void *unused) | ||
712 | { | ||
713 | /* only support LOCAL and IO APICs for now */ | ||
714 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | ||
715 | (nmi_watchdog != NMI_IO_APIC)) | ||
716 | return; | ||
717 | |||
718 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
719 | switch (boot_cpu_data.x86_vendor) { | ||
720 | case X86_VENDOR_AMD: | ||
721 | stop_k7_watchdog(); | ||
722 | break; | ||
723 | case X86_VENDOR_INTEL: | ||
724 | switch (boot_cpu_data.x86) { | ||
725 | case 6: | ||
726 | if (boot_cpu_data.x86_model > 0xd) | ||
727 | break; | ||
728 | stop_p6_watchdog(); | ||
729 | break; | ||
730 | case 15: | ||
731 | if (boot_cpu_data.x86_model > 0x4) | ||
732 | break; | ||
733 | stop_p4_watchdog(); | ||
734 | break; | ||
735 | } | ||
587 | break; | 736 | break; |
588 | default: | 737 | default: |
589 | return; | 738 | return; |
590 | } | 739 | } |
591 | break; | ||
592 | default: | ||
593 | return; | ||
594 | } | 740 | } |
595 | lapic_nmi_owner = LAPIC_NMI_WATCHDOG; | 741 | __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 0; |
596 | nmi_active = 1; | 742 | atomic_dec(&nmi_active); |
597 | } | 743 | } |
598 | 744 | ||
599 | /* | 745 | /* |
@@ -635,7 +781,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog); | |||
635 | 781 | ||
636 | extern void die_nmi(struct pt_regs *, const char *msg); | 782 | extern void die_nmi(struct pt_regs *, const char *msg); |
637 | 783 | ||
638 | void nmi_watchdog_tick (struct pt_regs * regs) | 784 | void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) |
639 | { | 785 | { |
640 | 786 | ||
641 | /* | 787 | /* |
@@ -644,11 +790,21 @@ void nmi_watchdog_tick (struct pt_regs * regs) | |||
644 | * smp_processor_id(). | 790 | * smp_processor_id(). |
645 | */ | 791 | */ |
646 | unsigned int sum; | 792 | unsigned int sum; |
793 | int touched = 0; | ||
647 | int cpu = smp_processor_id(); | 794 | int cpu = smp_processor_id(); |
795 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
796 | u64 dummy; | ||
797 | |||
798 | /* check for other users first */ | ||
799 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | ||
800 | == NOTIFY_STOP) { | ||
801 | touched = 1; | ||
802 | } | ||
648 | 803 | ||
649 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; | 804 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; |
650 | 805 | ||
651 | if (last_irq_sums[cpu] == sum) { | 806 | /* if the apic timer isn't firing, this cpu isn't doing much */ |
807 | if (!touched && last_irq_sums[cpu] == sum) { | ||
652 | /* | 808 | /* |
653 | * Ayiee, looks like this CPU is stuck ... | 809 | * Ayiee, looks like this CPU is stuck ... |
654 | * wait a few IRQs (5 seconds) before doing the oops ... | 810 | * wait a few IRQs (5 seconds) before doing the oops ... |
@@ -663,26 +819,41 @@ void nmi_watchdog_tick (struct pt_regs * regs) | |||
663 | last_irq_sums[cpu] = sum; | 819 | last_irq_sums[cpu] = sum; |
664 | alert_counter[cpu] = 0; | 820 | alert_counter[cpu] = 0; |
665 | } | 821 | } |
666 | if (nmi_perfctr_msr) { | 822 | /* see if the nmi watchdog went off */ |
667 | if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { | 823 | if (wd->enabled) { |
668 | /* | 824 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
669 | * P4 quirks: | 825 | rdmsrl(wd->perfctr_msr, dummy); |
670 | * - An overflown perfctr will assert its interrupt | 826 | if (dummy & wd->check_bit){ |
671 | * until the OVF flag in its CCCR is cleared. | 827 | /* this wasn't a watchdog timer interrupt */ |
672 | * - LVTPC is masked on interrupt and must be | 828 | goto done; |
673 | * unmasked by the LVTPC handler. | 829 | } |
674 | */ | 830 | |
675 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 831 | /* only Intel P4 uses the cccr msr */ |
676 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 832 | if (wd->cccr_msr != 0) { |
677 | } | 833 | /* |
678 | else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) { | 834 | * P4 quirks: |
679 | /* Only P6 based Pentium M need to re-unmask | 835 | * - An overflown perfctr will assert its interrupt |
680 | * the apic vector but it doesn't hurt | 836 | * until the OVF flag in its CCCR is cleared. |
681 | * other P6 variant */ | 837 | * - LVTPC is masked on interrupt and must be |
682 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 838 | * unmasked by the LVTPC handler. |
839 | */ | ||
840 | rdmsrl(wd->cccr_msr, dummy); | ||
841 | dummy &= ~P4_CCCR_OVF; | ||
842 | wrmsrl(wd->cccr_msr, dummy); | ||
843 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
844 | } | ||
845 | else if (wd->perfctr_msr == MSR_P6_PERFCTR0) { | ||
846 | /* Only P6 based Pentium M need to re-unmask | ||
847 | * the apic vector but it doesn't hurt | ||
848 | * other P6 variant */ | ||
849 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
850 | } | ||
851 | /* start the cycle over again */ | ||
852 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
683 | } | 853 | } |
684 | write_watchdog_counter(NULL); | ||
685 | } | 854 | } |
855 | done: | ||
856 | return; | ||
686 | } | 857 | } |
687 | 858 | ||
688 | #ifdef CONFIG_SYSCTL | 859 | #ifdef CONFIG_SYSCTL |