diff options
-rw-r--r-- | arch/x86_64/kernel/apic.c | 3 | ||||
-rw-r--r-- | arch/x86_64/kernel/nmi.c | 250 | ||||
-rw-r--r-- | arch/x86_64/kernel/smpboot.c | 3 | ||||
-rw-r--r-- | include/asm-x86_64/nmi.h | 2 |
4 files changed, 179 insertions, 79 deletions
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 7f0b44e4a6e3..f8e6cc4fecd4 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/mpspec.h> | 33 | #include <asm/mpspec.h> |
34 | #include <asm/pgalloc.h> | 34 | #include <asm/pgalloc.h> |
35 | #include <asm/mach_apic.h> | 35 | #include <asm/mach_apic.h> |
36 | #include <asm/nmi.h> | ||
36 | 37 | ||
37 | int apic_verbosity; | 38 | int apic_verbosity; |
38 | 39 | ||
@@ -1056,7 +1057,7 @@ int __init APIC_init_uniprocessor (void) | |||
1056 | nr_ioapics = 0; | 1057 | nr_ioapics = 0; |
1057 | #endif | 1058 | #endif |
1058 | setup_boot_APIC_clock(); | 1059 | setup_boot_APIC_clock(); |
1059 | 1060 | check_nmi_watchdog(); | |
1060 | return 0; | 1061 | return 0; |
1061 | } | 1062 | } |
1062 | 1063 | ||
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index ec13eb97e8e6..31c0f2e6ac91 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/msr.h> | 33 | #include <asm/msr.h> |
34 | #include <asm/proto.h> | 34 | #include <asm/proto.h> |
35 | #include <asm/kdebug.h> | 35 | #include <asm/kdebug.h> |
36 | #include <asm/local.h> | ||
36 | 37 | ||
37 | /* | 38 | /* |
38 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: | 39 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: |
@@ -59,7 +60,8 @@ int panic_on_timeout; | |||
59 | 60 | ||
60 | unsigned int nmi_watchdog = NMI_DEFAULT; | 61 | unsigned int nmi_watchdog = NMI_DEFAULT; |
61 | static unsigned int nmi_hz = HZ; | 62 | static unsigned int nmi_hz = HZ; |
62 | unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ | 63 | static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ |
64 | static unsigned int nmi_p4_cccr_val; | ||
63 | 65 | ||
64 | /* Note that these events don't tick when the CPU idles. This means | 66 | /* Note that these events don't tick when the CPU idles. This means |
65 | the frequency varies with CPU load. */ | 67 | the frequency varies with CPU load. */ |
@@ -71,67 +73,87 @@ unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ | |||
71 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | 73 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 |
72 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | 74 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING |
73 | 75 | ||
74 | #define P6_EVNTSEL0_ENABLE (1 << 22) | 76 | #define MSR_P4_MISC_ENABLE 0x1A0 |
75 | #define P6_EVNTSEL_INT (1 << 20) | 77 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) |
76 | #define P6_EVNTSEL_OS (1 << 17) | 78 | #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) |
77 | #define P6_EVNTSEL_USR (1 << 16) | 79 | #define MSR_P4_PERFCTR0 0x300 |
78 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | 80 | #define MSR_P4_CCCR0 0x360 |
79 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | 81 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) |
82 | #define P4_ESCR_OS (1<<3) | ||
83 | #define P4_ESCR_USR (1<<2) | ||
84 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
85 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
86 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
87 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
88 | #define P4_CCCR_COMPARE (1<<18) | ||
89 | #define P4_CCCR_REQUIRED (3<<16) | ||
90 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
91 | #define P4_CCCR_ENABLE (1<<12) | ||
92 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
93 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
94 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
95 | #define MSR_P4_IQ_COUNTER0 0x30C | ||
96 | #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) | ||
97 | #define P4_NMI_IQ_CCCR0 \ | ||
98 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ | ||
99 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) | ||
100 | |||
101 | static __init inline int nmi_known_cpu(void) | ||
102 | { | ||
103 | switch (boot_cpu_data.x86_vendor) { | ||
104 | case X86_VENDOR_AMD: | ||
105 | return boot_cpu_data.x86 == 15; | ||
106 | case X86_VENDOR_INTEL: | ||
107 | return boot_cpu_data.x86 == 15; | ||
108 | } | ||
109 | return 0; | ||
110 | } | ||
80 | 111 | ||
81 | /* Run after command line and cpu_init init, but before all other checks */ | 112 | /* Run after command line and cpu_init init, but before all other checks */ |
82 | void __init nmi_watchdog_default(void) | 113 | void __init nmi_watchdog_default(void) |
83 | { | 114 | { |
84 | if (nmi_watchdog != NMI_DEFAULT) | 115 | if (nmi_watchdog != NMI_DEFAULT) |
85 | return; | 116 | return; |
86 | 117 | if (nmi_known_cpu()) | |
87 | /* For some reason the IO APIC watchdog doesn't work on the AMD | 118 | nmi_watchdog = NMI_LOCAL_APIC; |
88 | 8111 chipset. For now switch to local APIC mode using | 119 | else |
89 | perfctr0 there. On Intel CPUs we don't have code to handle | ||
90 | the perfctr and the IO-APIC seems to work, so use that. */ | ||
91 | |||
92 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | ||
93 | nmi_watchdog = NMI_LOCAL_APIC; | ||
94 | printk(KERN_INFO | ||
95 | "Using local APIC NMI watchdog using perfctr0\n"); | ||
96 | } else { | ||
97 | printk(KERN_INFO "Using IO APIC NMI watchdog\n"); | ||
98 | nmi_watchdog = NMI_IO_APIC; | 120 | nmi_watchdog = NMI_IO_APIC; |
99 | } | ||
100 | } | 121 | } |
101 | 122 | ||
102 | /* Why is there no CPUID flag for this? */ | 123 | #ifdef CONFIG_SMP |
103 | static __init int cpu_has_lapic(void) | 124 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when |
125 | * the CPU is idle. To make sure the NMI watchdog really ticks on all | ||
126 | * CPUs during the test make them busy. | ||
127 | */ | ||
128 | static __init void nmi_cpu_busy(void *data) | ||
104 | { | 129 | { |
105 | switch (boot_cpu_data.x86_vendor) { | 130 | volatile int *endflag = data; |
106 | case X86_VENDOR_INTEL: | 131 | local_irq_enable(); |
107 | case X86_VENDOR_AMD: | 132 | /* Intentionally don't use cpu_relax here. This is |
108 | return boot_cpu_data.x86 >= 6; | 133 | to make sure that the performance counter really ticks, |
109 | /* .... add more cpus here or find a different way to figure this out. */ | 134 | even if there is a simulator or similar that catches the |
110 | default: | 135 | pause instruction. On a real HT machine this is fine because |
111 | return 0; | 136 | all other CPUs are busy with "useless" delay loops and don't |
112 | } | 137 | care if they get somewhat less cycles. */ |
138 | while (*endflag == 0) | ||
139 | barrier(); | ||
113 | } | 140 | } |
141 | #endif | ||
114 | 142 | ||
115 | static int __init check_nmi_watchdog (void) | 143 | int __init check_nmi_watchdog (void) |
116 | { | 144 | { |
145 | volatile int endflag = 0; | ||
117 | int *counts; | 146 | int *counts; |
118 | int cpu; | 147 | int cpu; |
119 | 148 | ||
120 | if (nmi_watchdog == NMI_NONE) | 149 | counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); |
121 | return 0; | 150 | if (!counts) |
151 | return -1; | ||
122 | 152 | ||
123 | if (nmi_watchdog == NMI_LOCAL_APIC && !cpu_has_lapic()) { | 153 | printk(KERN_INFO "testing NMI watchdog ... "); |
124 | nmi_watchdog = NMI_NONE; | ||
125 | return -1; | ||
126 | } | ||
127 | 154 | ||
128 | counts = kmalloc(NR_CPUS * sizeof(int),GFP_KERNEL); | 155 | if (nmi_watchdog == NMI_LOCAL_APIC) |
129 | if (!counts) { | 156 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); |
130 | nmi_watchdog = NMI_NONE; | ||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | printk(KERN_INFO "Testing NMI watchdog ... "); | ||
135 | 157 | ||
136 | for (cpu = 0; cpu < NR_CPUS; cpu++) | 158 | for (cpu = 0; cpu < NR_CPUS; cpu++) |
137 | counts[cpu] = cpu_pda[cpu].__nmi_count; | 159 | counts[cpu] = cpu_pda[cpu].__nmi_count; |
@@ -139,16 +161,22 @@ static int __init check_nmi_watchdog (void) | |||
139 | mdelay((10*1000)/nmi_hz); // wait 10 ticks | 161 | mdelay((10*1000)/nmi_hz); // wait 10 ticks |
140 | 162 | ||
141 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 163 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
164 | if (!cpu_online(cpu)) | ||
165 | continue; | ||
142 | if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) { | 166 | if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) { |
143 | printk("CPU#%d: NMI appears to be stuck (%d)!\n", | 167 | endflag = 1; |
168 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", | ||
144 | cpu, | 169 | cpu, |
170 | counts[cpu], | ||
145 | cpu_pda[cpu].__nmi_count); | 171 | cpu_pda[cpu].__nmi_count); |
146 | nmi_active = 0; | 172 | nmi_active = 0; |
147 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; | 173 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; |
174 | nmi_perfctr_msr = 0; | ||
148 | kfree(counts); | 175 | kfree(counts); |
149 | return -1; | 176 | return -1; |
150 | } | 177 | } |
151 | } | 178 | } |
179 | endflag = 1; | ||
152 | printk("OK.\n"); | 180 | printk("OK.\n"); |
153 | 181 | ||
154 | /* now that we know it works we can reduce NMI frequency to | 182 | /* now that we know it works we can reduce NMI frequency to |
@@ -159,8 +187,6 @@ static int __init check_nmi_watchdog (void) | |||
159 | kfree(counts); | 187 | kfree(counts); |
160 | return 0; | 188 | return 0; |
161 | } | 189 | } |
162 | /* Have this called later during boot so counters are updating */ | ||
163 | late_initcall(check_nmi_watchdog); | ||
164 | 190 | ||
165 | int __init setup_nmi_watchdog(char *str) | 191 | int __init setup_nmi_watchdog(char *str) |
166 | { | 192 | { |
@@ -178,7 +204,7 @@ int __init setup_nmi_watchdog(char *str) | |||
178 | 204 | ||
179 | if (nmi >= NMI_INVALID) | 205 | if (nmi >= NMI_INVALID) |
180 | return 0; | 206 | return 0; |
181 | nmi_watchdog = nmi; | 207 | nmi_watchdog = nmi; |
182 | return 1; | 208 | return 1; |
183 | } | 209 | } |
184 | 210 | ||
@@ -193,7 +219,10 @@ static void disable_lapic_nmi_watchdog(void) | |||
193 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); | 219 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); |
194 | break; | 220 | break; |
195 | case X86_VENDOR_INTEL: | 221 | case X86_VENDOR_INTEL: |
196 | wrmsr(MSR_IA32_EVNTSEL0, 0, 0); | 222 | if (boot_cpu_data.x86 == 15) { |
223 | wrmsr(MSR_P4_IQ_CCCR0, 0, 0); | ||
224 | wrmsr(MSR_P4_CRU_ESCR0, 0, 0); | ||
225 | } | ||
197 | break; | 226 | break; |
198 | } | 227 | } |
199 | nmi_active = -1; | 228 | nmi_active = -1; |
@@ -261,7 +290,7 @@ void enable_timer_nmi_watchdog(void) | |||
261 | 290 | ||
262 | static int nmi_pm_active; /* nmi_active before suspend */ | 291 | static int nmi_pm_active; /* nmi_active before suspend */ |
263 | 292 | ||
264 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) | 293 | static int lapic_nmi_suspend(struct sys_device *dev, u32 state) |
265 | { | 294 | { |
266 | nmi_pm_active = nmi_active; | 295 | nmi_pm_active = nmi_active; |
267 | disable_lapic_nmi_watchdog(); | 296 | disable_lapic_nmi_watchdog(); |
@@ -308,22 +337,27 @@ late_initcall(init_lapic_nmi_sysfs); | |||
308 | * Original code written by Keith Owens. | 337 | * Original code written by Keith Owens. |
309 | */ | 338 | */ |
310 | 339 | ||
340 | static void clear_msr_range(unsigned int base, unsigned int n) | ||
341 | { | ||
342 | unsigned int i; | ||
343 | |||
344 | for(i = 0; i < n; ++i) | ||
345 | wrmsr(base+i, 0, 0); | ||
346 | } | ||
347 | |||
311 | static void setup_k7_watchdog(void) | 348 | static void setup_k7_watchdog(void) |
312 | { | 349 | { |
313 | int i; | 350 | int i; |
314 | unsigned int evntsel; | 351 | unsigned int evntsel; |
315 | 352 | ||
316 | /* No check, so can start with slow frequency */ | ||
317 | nmi_hz = 1; | ||
318 | |||
319 | /* XXX should check these in EFER */ | ||
320 | |||
321 | nmi_perfctr_msr = MSR_K7_PERFCTR0; | 353 | nmi_perfctr_msr = MSR_K7_PERFCTR0; |
322 | 354 | ||
323 | for(i = 0; i < 4; ++i) { | 355 | for(i = 0; i < 4; ++i) { |
324 | /* Simulator may not support it */ | 356 | /* Simulator may not support it */ |
325 | if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) | 357 | if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) { |
358 | nmi_perfctr_msr = 0; | ||
326 | return; | 359 | return; |
360 | } | ||
327 | wrmsrl(MSR_K7_PERFCTR0+i, 0UL); | 361 | wrmsrl(MSR_K7_PERFCTR0+i, 0UL); |
328 | } | 362 | } |
329 | 363 | ||
@@ -333,12 +367,54 @@ static void setup_k7_watchdog(void) | |||
333 | | K7_NMI_EVENT; | 367 | | K7_NMI_EVENT; |
334 | 368 | ||
335 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 369 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); |
336 | wrmsrl(MSR_K7_PERFCTR0, -((u64)cpu_khz*1000) / nmi_hz); | 370 | wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1); |
337 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 371 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
338 | evntsel |= K7_EVNTSEL_ENABLE; | 372 | evntsel |= K7_EVNTSEL_ENABLE; |
339 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 373 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); |
340 | } | 374 | } |
341 | 375 | ||
376 | |||
377 | static int setup_p4_watchdog(void) | ||
378 | { | ||
379 | unsigned int misc_enable, dummy; | ||
380 | |||
381 | rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); | ||
382 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | ||
383 | return 0; | ||
384 | |||
385 | nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; | ||
386 | nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; | ||
387 | #ifdef CONFIG_SMP | ||
388 | if (smp_num_siblings == 2) | ||
389 | nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; | ||
390 | #endif | ||
391 | |||
392 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) | ||
393 | clear_msr_range(0x3F1, 2); | ||
394 | /* MSR 0x3F0 seems to have a default value of 0xFC00, but current | ||
395 | docs doesn't fully define it, so leave it alone for now. */ | ||
396 | if (boot_cpu_data.x86_model >= 0x3) { | ||
397 | /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ | ||
398 | clear_msr_range(0x3A0, 26); | ||
399 | clear_msr_range(0x3BC, 3); | ||
400 | } else { | ||
401 | clear_msr_range(0x3A0, 31); | ||
402 | } | ||
403 | clear_msr_range(0x3C0, 6); | ||
404 | clear_msr_range(0x3C8, 6); | ||
405 | clear_msr_range(0x3E0, 2); | ||
406 | clear_msr_range(MSR_P4_CCCR0, 18); | ||
407 | clear_msr_range(MSR_P4_PERFCTR0, 18); | ||
408 | |||
409 | wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); | ||
410 | wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); | ||
411 | Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); | ||
412 | wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); | ||
413 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
414 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | ||
415 | return 1; | ||
416 | } | ||
417 | |||
342 | void setup_apic_nmi_watchdog(void) | 418 | void setup_apic_nmi_watchdog(void) |
343 | { | 419 | { |
344 | switch (boot_cpu_data.x86_vendor) { | 420 | switch (boot_cpu_data.x86_vendor) { |
@@ -349,6 +425,13 @@ void setup_apic_nmi_watchdog(void) | |||
349 | return; | 425 | return; |
350 | setup_k7_watchdog(); | 426 | setup_k7_watchdog(); |
351 | break; | 427 | break; |
428 | case X86_VENDOR_INTEL: | ||
429 | if (boot_cpu_data.x86 != 15) | ||
430 | return; | ||
431 | if (!setup_p4_watchdog()) | ||
432 | return; | ||
433 | break; | ||
434 | |||
352 | default: | 435 | default: |
353 | return; | 436 | return; |
354 | } | 437 | } |
@@ -363,56 +446,67 @@ void setup_apic_nmi_watchdog(void) | |||
363 | * | 446 | * |
364 | * as these watchdog NMI IRQs are generated on every CPU, we only | 447 | * as these watchdog NMI IRQs are generated on every CPU, we only |
365 | * have to check the current processor. | 448 | * have to check the current processor. |
366 | * | ||
367 | * since NMIs don't listen to _any_ locks, we have to be extremely | ||
368 | * careful not to rely on unsafe variables. The printk might lock | ||
369 | * up though, so we have to break up any console locks first ... | ||
370 | * [when there will be more tty-related locks, break them up | ||
371 | * here too!] | ||
372 | */ | 449 | */ |
373 | 450 | ||
374 | static unsigned int | 451 | static DEFINE_PER_CPU(unsigned, last_irq_sum); |
375 | last_irq_sums [NR_CPUS], | 452 | static DEFINE_PER_CPU(local_t, alert_counter); |
376 | alert_counter [NR_CPUS]; | 453 | static DEFINE_PER_CPU(int, nmi_touch); |
377 | 454 | ||
378 | void touch_nmi_watchdog (void) | 455 | void touch_nmi_watchdog (void) |
379 | { | 456 | { |
380 | int i; | 457 | int i; |
381 | 458 | ||
382 | /* | 459 | /* |
383 | * Just reset the alert counters, (other CPUs might be | 460 | * Tell other CPUs to reset their alert counters. We cannot |
384 | * spinning on locks we hold): | 461 | * do it ourselves because the alert count increase is not |
462 | * atomic. | ||
385 | */ | 463 | */ |
386 | for (i = 0; i < NR_CPUS; i++) | 464 | for (i = 0; i < NR_CPUS; i++) |
387 | alert_counter[i] = 0; | 465 | per_cpu(nmi_touch, i) = 1; |
388 | } | 466 | } |
389 | 467 | ||
390 | void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) | 468 | void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) |
391 | { | 469 | { |
392 | int sum, cpu; | 470 | int sum; |
471 | int touched = 0; | ||
393 | 472 | ||
394 | cpu = safe_smp_processor_id(); | ||
395 | sum = read_pda(apic_timer_irqs); | 473 | sum = read_pda(apic_timer_irqs); |
396 | if (last_irq_sums[cpu] == sum) { | 474 | if (__get_cpu_var(nmi_touch)) { |
475 | __get_cpu_var(nmi_touch) = 0; | ||
476 | touched = 1; | ||
477 | } | ||
478 | if (!touched && __get_cpu_var(last_irq_sum) == sum) { | ||
397 | /* | 479 | /* |
398 | * Ayiee, looks like this CPU is stuck ... | 480 | * Ayiee, looks like this CPU is stuck ... |
399 | * wait a few IRQs (5 seconds) before doing the oops ... | 481 | * wait a few IRQs (5 seconds) before doing the oops ... |
400 | */ | 482 | */ |
401 | alert_counter[cpu]++; | 483 | local_inc(&__get_cpu_var(alert_counter)); |
402 | if (alert_counter[cpu] == 5*nmi_hz) { | 484 | if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { |
403 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | 485 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) |
404 | == NOTIFY_STOP) { | 486 | == NOTIFY_STOP) { |
405 | alert_counter[cpu] = 0; | 487 | local_set(&__get_cpu_var(alert_counter), 0); |
406 | return; | 488 | return; |
407 | } | 489 | } |
408 | die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs); | 490 | die_nmi("NMI Watchdog detected LOCKUP on CPU%d", regs); |
409 | } | 491 | } |
410 | } else { | 492 | } else { |
411 | last_irq_sums[cpu] = sum; | 493 | __get_cpu_var(last_irq_sum) = sum; |
412 | alert_counter[cpu] = 0; | 494 | local_set(&__get_cpu_var(alert_counter), 0); |
413 | } | 495 | } |
414 | if (nmi_perfctr_msr) | 496 | if (nmi_perfctr_msr) { |
497 | if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { | ||
498 | /* | ||
499 | * P4 quirks: | ||
500 | * - An overflown perfctr will assert its interrupt | ||
501 | * until the OVF flag in its CCCR is cleared. | ||
502 | * - LVTPC is masked on interrupt and must be | ||
503 | * unmasked by the LVTPC handler. | ||
504 | */ | ||
505 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | ||
506 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
507 | } | ||
415 | wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); | 508 | wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); |
509 | } | ||
416 | } | 510 | } |
417 | 511 | ||
418 | static int dummy_nmi_callback(struct pt_regs * regs, int cpu) | 512 | static int dummy_nmi_callback(struct pt_regs * regs, int cpu) |
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 17efc40baa32..d00e494c1a39 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <asm/kdebug.h> | 56 | #include <asm/kdebug.h> |
57 | #include <asm/tlbflush.h> | 57 | #include <asm/tlbflush.h> |
58 | #include <asm/proto.h> | 58 | #include <asm/proto.h> |
59 | #include <asm/nmi.h> | ||
59 | 60 | ||
60 | /* Change for real CPU hotplug. Note other files need to be fixed | 61 | /* Change for real CPU hotplug. Note other files need to be fixed |
61 | first too. */ | 62 | first too. */ |
@@ -1030,4 +1031,6 @@ void __cpuinit smp_cpus_done(unsigned int max_cpus) | |||
1030 | 1031 | ||
1031 | detect_siblings(); | 1032 | detect_siblings(); |
1032 | time_init_gtod(); | 1033 | time_init_gtod(); |
1034 | |||
1035 | check_nmi_watchdog(); | ||
1033 | } | 1036 | } |
diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h index 21d56b086b9d..d3abfc6a8fd5 100644 --- a/include/asm-x86_64/nmi.h +++ b/include/asm-x86_64/nmi.h | |||
@@ -53,5 +53,7 @@ extern void die_nmi(char *str, struct pt_regs *regs); | |||
53 | 53 | ||
54 | extern int panic_on_timeout; | 54 | extern int panic_on_timeout; |
55 | extern int unknown_nmi_panic; | 55 | extern int unknown_nmi_panic; |
56 | |||
57 | extern int check_nmi_watchdog(void); | ||
56 | 58 | ||
57 | #endif /* ASM_NMI_H */ | 59 | #endif /* ASM_NMI_H */ |