diff options
Diffstat (limited to 'arch/x86_64/kernel/nmi.c')
-rw-r--r-- | arch/x86_64/kernel/nmi.c | 840 |
1 files changed, 557 insertions, 283 deletions
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 5baa0c726e97..4d6fb047952e 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -28,71 +28,138 @@ | |||
28 | #include <asm/mce.h> | 28 | #include <asm/mce.h> |
29 | #include <asm/intel_arch_perfmon.h> | 29 | #include <asm/intel_arch_perfmon.h> |
30 | 30 | ||
31 | /* | 31 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: |
32 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: | 32 | * evtsel_nmi_owner tracks the ownership of the event selection |
33 | * - it may be reserved by some other driver, or not | 33 | * - different performance counters/ event selection may be reserved for |
34 | * - when not reserved by some other driver, it may be used for | 34 | * different subsystems this reservation system just tries to coordinate |
35 | * the NMI watchdog, or not | 35 | * things a little |
36 | * | ||
37 | * This is maintained separately from nmi_active because the NMI | ||
38 | * watchdog may also be driven from the I/O APIC timer. | ||
39 | */ | 36 | */ |
40 | static DEFINE_SPINLOCK(lapic_nmi_owner_lock); | 37 | static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); |
41 | static unsigned int lapic_nmi_owner; | 38 | static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); |
42 | #define LAPIC_NMI_WATCHDOG (1<<0) | 39 | |
43 | #define LAPIC_NMI_RESERVED (1<<1) | 40 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's |
41 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) | ||
42 | */ | ||
43 | #define NMI_MAX_COUNTER_BITS 66 | ||
44 | 44 | ||
45 | /* nmi_active: | 45 | /* nmi_active: |
46 | * +1: the lapic NMI watchdog is active, but can be disabled | 46 | * >0: the lapic NMI watchdog is active, but can be disabled |
47 | * 0: the lapic NMI watchdog has not been set up, and cannot | 47 | * <0: the lapic NMI watchdog has not been set up, and cannot |
48 | * be enabled | 48 | * be enabled |
49 | * -1: the lapic NMI watchdog is disabled, but can be enabled | 49 | * 0: the lapic NMI watchdog is disabled, but can be enabled |
50 | */ | 50 | */ |
51 | int nmi_active; /* oprofile uses this */ | 51 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ |
52 | int panic_on_timeout; | 52 | int panic_on_timeout; |
53 | 53 | ||
54 | unsigned int nmi_watchdog = NMI_DEFAULT; | 54 | unsigned int nmi_watchdog = NMI_DEFAULT; |
55 | static unsigned int nmi_hz = HZ; | 55 | static unsigned int nmi_hz = HZ; |
56 | static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ | ||
57 | static unsigned int nmi_p4_cccr_val; | ||
58 | 56 | ||
59 | /* Note that these events don't tick when the CPU idles. This means | 57 | struct nmi_watchdog_ctlblk { |
60 | the frequency varies with CPU load. */ | 58 | int enabled; |
59 | u64 check_bit; | ||
60 | unsigned int cccr_msr; | ||
61 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ | ||
62 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ | ||
63 | }; | ||
64 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | ||
61 | 65 | ||
62 | #define K7_EVNTSEL_ENABLE (1 << 22) | 66 | /* local prototypes */ |
63 | #define K7_EVNTSEL_INT (1 << 20) | 67 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); |
64 | #define K7_EVNTSEL_OS (1 << 17) | ||
65 | #define K7_EVNTSEL_USR (1 << 16) | ||
66 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
67 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
68 | 68 | ||
69 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | 69 | /* converts an msr to an appropriate reservation bit */ |
70 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | 70 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) |
71 | { | ||
72 | /* returns the bit offset of the performance counter register */ | ||
73 | switch (boot_cpu_data.x86_vendor) { | ||
74 | case X86_VENDOR_AMD: | ||
75 | return (msr - MSR_K7_PERFCTR0); | ||
76 | case X86_VENDOR_INTEL: | ||
77 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
78 | return (msr - MSR_ARCH_PERFMON_PERFCTR0); | ||
79 | else | ||
80 | return (msr - MSR_P4_BPU_PERFCTR0); | ||
81 | } | ||
82 | return 0; | ||
83 | } | ||
71 | 84 | ||
72 | #define MSR_P4_MISC_ENABLE 0x1A0 | 85 | /* converts an msr to an appropriate reservation bit */ |
73 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | 86 | static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) |
74 | #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) | 87 | { |
75 | #define MSR_P4_PERFCTR0 0x300 | 88 | /* returns the bit offset of the event selection register */ |
76 | #define MSR_P4_CCCR0 0x360 | 89 | switch (boot_cpu_data.x86_vendor) { |
77 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | 90 | case X86_VENDOR_AMD: |
78 | #define P4_ESCR_OS (1<<3) | 91 | return (msr - MSR_K7_EVNTSEL0); |
79 | #define P4_ESCR_USR (1<<2) | 92 | case X86_VENDOR_INTEL: |
80 | #define P4_CCCR_OVF_PMI0 (1<<26) | 93 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
81 | #define P4_CCCR_OVF_PMI1 (1<<27) | 94 | return (msr - MSR_ARCH_PERFMON_EVENTSEL0); |
82 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | 95 | else |
83 | #define P4_CCCR_COMPLEMENT (1<<19) | 96 | return (msr - MSR_P4_BSU_ESCR0); |
84 | #define P4_CCCR_COMPARE (1<<18) | 97 | } |
85 | #define P4_CCCR_REQUIRED (3<<16) | 98 | return 0; |
86 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | 99 | } |
87 | #define P4_CCCR_ENABLE (1<<12) | 100 | |
88 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | 101 | /* checks for a bit availability (hack for oprofile) */ |
89 | CRU_ESCR0 (with any non-null event selector) through a complemented | 102 | int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) |
90 | max threshold. [IA32-Vol3, Section 14.9.9] */ | 103 | { |
91 | #define MSR_P4_IQ_COUNTER0 0x30C | 104 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); |
92 | #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) | 105 | |
93 | #define P4_NMI_IQ_CCCR0 \ | 106 | return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); |
94 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ | 107 | } |
95 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) | 108 | |
109 | /* checks the an msr for availability */ | ||
110 | int avail_to_resrv_perfctr_nmi(unsigned int msr) | ||
111 | { | ||
112 | unsigned int counter; | ||
113 | |||
114 | counter = nmi_perfctr_msr_to_bit(msr); | ||
115 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
116 | |||
117 | return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); | ||
118 | } | ||
119 | |||
120 | int reserve_perfctr_nmi(unsigned int msr) | ||
121 | { | ||
122 | unsigned int counter; | ||
123 | |||
124 | counter = nmi_perfctr_msr_to_bit(msr); | ||
125 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
126 | |||
127 | if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner))) | ||
128 | return 1; | ||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | void release_perfctr_nmi(unsigned int msr) | ||
133 | { | ||
134 | unsigned int counter; | ||
135 | |||
136 | counter = nmi_perfctr_msr_to_bit(msr); | ||
137 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
138 | |||
139 | clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner)); | ||
140 | } | ||
141 | |||
142 | int reserve_evntsel_nmi(unsigned int msr) | ||
143 | { | ||
144 | unsigned int counter; | ||
145 | |||
146 | counter = nmi_evntsel_msr_to_bit(msr); | ||
147 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
148 | |||
149 | if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner))) | ||
150 | return 1; | ||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | void release_evntsel_nmi(unsigned int msr) | ||
155 | { | ||
156 | unsigned int counter; | ||
157 | |||
158 | counter = nmi_evntsel_msr_to_bit(msr); | ||
159 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
160 | |||
161 | clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)); | ||
162 | } | ||
96 | 163 | ||
97 | static __cpuinit inline int nmi_known_cpu(void) | 164 | static __cpuinit inline int nmi_known_cpu(void) |
98 | { | 165 | { |
@@ -109,7 +176,7 @@ static __cpuinit inline int nmi_known_cpu(void) | |||
109 | } | 176 | } |
110 | 177 | ||
111 | /* Run after command line and cpu_init init, but before all other checks */ | 178 | /* Run after command line and cpu_init init, but before all other checks */ |
112 | void __cpuinit nmi_watchdog_default(void) | 179 | void nmi_watchdog_default(void) |
113 | { | 180 | { |
114 | if (nmi_watchdog != NMI_DEFAULT) | 181 | if (nmi_watchdog != NMI_DEFAULT) |
115 | return; | 182 | return; |
@@ -145,6 +212,12 @@ int __init check_nmi_watchdog (void) | |||
145 | int *counts; | 212 | int *counts; |
146 | int cpu; | 213 | int cpu; |
147 | 214 | ||
215 | if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT)) | ||
216 | return 0; | ||
217 | |||
218 | if (!atomic_read(&nmi_active)) | ||
219 | return 0; | ||
220 | |||
148 | counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); | 221 | counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); |
149 | if (!counts) | 222 | if (!counts) |
150 | return -1; | 223 | return -1; |
@@ -162,26 +235,43 @@ int __init check_nmi_watchdog (void) | |||
162 | mdelay((10*1000)/nmi_hz); // wait 10 ticks | 235 | mdelay((10*1000)/nmi_hz); // wait 10 ticks |
163 | 236 | ||
164 | for_each_online_cpu(cpu) { | 237 | for_each_online_cpu(cpu) { |
238 | if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) | ||
239 | continue; | ||
165 | if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { | 240 | if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { |
166 | endflag = 1; | ||
167 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", | 241 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", |
168 | cpu, | 242 | cpu, |
169 | counts[cpu], | 243 | counts[cpu], |
170 | cpu_pda(cpu)->__nmi_count); | 244 | cpu_pda(cpu)->__nmi_count); |
171 | nmi_active = 0; | 245 | per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; |
172 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; | 246 | atomic_dec(&nmi_active); |
173 | nmi_perfctr_msr = 0; | ||
174 | kfree(counts); | ||
175 | return -1; | ||
176 | } | 247 | } |
177 | } | 248 | } |
249 | if (!atomic_read(&nmi_active)) { | ||
250 | kfree(counts); | ||
251 | atomic_set(&nmi_active, -1); | ||
252 | return -1; | ||
253 | } | ||
178 | endflag = 1; | 254 | endflag = 1; |
179 | printk("OK.\n"); | 255 | printk("OK.\n"); |
180 | 256 | ||
181 | /* now that we know it works we can reduce NMI frequency to | 257 | /* now that we know it works we can reduce NMI frequency to |
182 | something more reasonable; makes a difference in some configs */ | 258 | something more reasonable; makes a difference in some configs */ |
183 | if (nmi_watchdog == NMI_LOCAL_APIC) | 259 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
260 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
261 | |||
184 | nmi_hz = 1; | 262 | nmi_hz = 1; |
263 | /* | ||
264 | * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter | ||
265 | * are writable, with higher bits sign extending from bit 31. | ||
266 | * So, we can only program the counter with 31 bit values and | ||
267 | * 32nd bit should be 1, for 33.. to be 1. | ||
268 | * Find the appropriate nmi_hz | ||
269 | */ | ||
270 | if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 && | ||
271 | ((u64)cpu_khz * 1000) > 0x7fffffffULL) { | ||
272 | nmi_hz = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1; | ||
273 | } | ||
274 | } | ||
185 | 275 | ||
186 | kfree(counts); | 276 | kfree(counts); |
187 | return 0; | 277 | return 0; |
@@ -201,91 +291,65 @@ int __init setup_nmi_watchdog(char *str) | |||
201 | 291 | ||
202 | get_option(&str, &nmi); | 292 | get_option(&str, &nmi); |
203 | 293 | ||
204 | if (nmi >= NMI_INVALID) | 294 | if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) |
205 | return 0; | 295 | return 0; |
296 | |||
297 | if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0)) | ||
298 | return 0; /* no lapic support */ | ||
206 | nmi_watchdog = nmi; | 299 | nmi_watchdog = nmi; |
207 | return 1; | 300 | return 1; |
208 | } | 301 | } |
209 | 302 | ||
210 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 303 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
211 | 304 | ||
212 | static void disable_intel_arch_watchdog(void); | ||
213 | |||
214 | static void disable_lapic_nmi_watchdog(void) | 305 | static void disable_lapic_nmi_watchdog(void) |
215 | { | 306 | { |
216 | if (nmi_active <= 0) | 307 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); |
308 | |||
309 | if (atomic_read(&nmi_active) <= 0) | ||
217 | return; | 310 | return; |
218 | switch (boot_cpu_data.x86_vendor) { | ||
219 | case X86_VENDOR_AMD: | ||
220 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); | ||
221 | break; | ||
222 | case X86_VENDOR_INTEL: | ||
223 | if (boot_cpu_data.x86 == 15) { | ||
224 | wrmsr(MSR_P4_IQ_CCCR0, 0, 0); | ||
225 | wrmsr(MSR_P4_CRU_ESCR0, 0, 0); | ||
226 | } else if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
227 | disable_intel_arch_watchdog(); | ||
228 | } | ||
229 | break; | ||
230 | } | ||
231 | nmi_active = -1; | ||
232 | /* tell do_nmi() and others that we're not active any more */ | ||
233 | nmi_watchdog = 0; | ||
234 | } | ||
235 | 311 | ||
236 | static void enable_lapic_nmi_watchdog(void) | 312 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); |
237 | { | 313 | |
238 | if (nmi_active < 0) { | 314 | BUG_ON(atomic_read(&nmi_active) != 0); |
239 | nmi_watchdog = NMI_LOCAL_APIC; | ||
240 | touch_nmi_watchdog(); | ||
241 | setup_apic_nmi_watchdog(); | ||
242 | } | ||
243 | } | 315 | } |
244 | 316 | ||
245 | int reserve_lapic_nmi(void) | 317 | static void enable_lapic_nmi_watchdog(void) |
246 | { | 318 | { |
247 | unsigned int old_owner; | 319 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); |
248 | 320 | ||
249 | spin_lock(&lapic_nmi_owner_lock); | 321 | /* are we already enabled */ |
250 | old_owner = lapic_nmi_owner; | 322 | if (atomic_read(&nmi_active) != 0) |
251 | lapic_nmi_owner |= LAPIC_NMI_RESERVED; | 323 | return; |
252 | spin_unlock(&lapic_nmi_owner_lock); | ||
253 | if (old_owner & LAPIC_NMI_RESERVED) | ||
254 | return -EBUSY; | ||
255 | if (old_owner & LAPIC_NMI_WATCHDOG) | ||
256 | disable_lapic_nmi_watchdog(); | ||
257 | return 0; | ||
258 | } | ||
259 | 324 | ||
260 | void release_lapic_nmi(void) | 325 | /* are we lapic aware */ |
261 | { | 326 | if (nmi_known_cpu() <= 0) |
262 | unsigned int new_owner; | 327 | return; |
263 | 328 | ||
264 | spin_lock(&lapic_nmi_owner_lock); | 329 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); |
265 | new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; | 330 | touch_nmi_watchdog(); |
266 | lapic_nmi_owner = new_owner; | ||
267 | spin_unlock(&lapic_nmi_owner_lock); | ||
268 | if (new_owner & LAPIC_NMI_WATCHDOG) | ||
269 | enable_lapic_nmi_watchdog(); | ||
270 | } | 331 | } |
271 | 332 | ||
272 | void disable_timer_nmi_watchdog(void) | 333 | void disable_timer_nmi_watchdog(void) |
273 | { | 334 | { |
274 | if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) | 335 | BUG_ON(nmi_watchdog != NMI_IO_APIC); |
336 | |||
337 | if (atomic_read(&nmi_active) <= 0) | ||
275 | return; | 338 | return; |
276 | 339 | ||
277 | disable_irq(0); | 340 | disable_irq(0); |
278 | unset_nmi_callback(); | 341 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); |
279 | nmi_active = -1; | 342 | |
280 | nmi_watchdog = NMI_NONE; | 343 | BUG_ON(atomic_read(&nmi_active) != 0); |
281 | } | 344 | } |
282 | 345 | ||
283 | void enable_timer_nmi_watchdog(void) | 346 | void enable_timer_nmi_watchdog(void) |
284 | { | 347 | { |
285 | if (nmi_active < 0) { | 348 | BUG_ON(nmi_watchdog != NMI_IO_APIC); |
286 | nmi_watchdog = NMI_IO_APIC; | 349 | |
350 | if (atomic_read(&nmi_active) == 0) { | ||
287 | touch_nmi_watchdog(); | 351 | touch_nmi_watchdog(); |
288 | nmi_active = 1; | 352 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); |
289 | enable_irq(0); | 353 | enable_irq(0); |
290 | } | 354 | } |
291 | } | 355 | } |
@@ -296,15 +360,20 @@ static int nmi_pm_active; /* nmi_active before suspend */ | |||
296 | 360 | ||
297 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) | 361 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) |
298 | { | 362 | { |
299 | nmi_pm_active = nmi_active; | 363 | /* only CPU0 goes here, other CPUs should be offline */ |
300 | disable_lapic_nmi_watchdog(); | 364 | nmi_pm_active = atomic_read(&nmi_active); |
365 | stop_apic_nmi_watchdog(NULL); | ||
366 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
301 | return 0; | 367 | return 0; |
302 | } | 368 | } |
303 | 369 | ||
304 | static int lapic_nmi_resume(struct sys_device *dev) | 370 | static int lapic_nmi_resume(struct sys_device *dev) |
305 | { | 371 | { |
306 | if (nmi_pm_active > 0) | 372 | /* only CPU0 goes here, other CPUs should be offline */ |
307 | enable_lapic_nmi_watchdog(); | 373 | if (nmi_pm_active > 0) { |
374 | setup_apic_nmi_watchdog(NULL); | ||
375 | touch_nmi_watchdog(); | ||
376 | } | ||
308 | return 0; | 377 | return 0; |
309 | } | 378 | } |
310 | 379 | ||
@@ -323,7 +392,13 @@ static int __init init_lapic_nmi_sysfs(void) | |||
323 | { | 392 | { |
324 | int error; | 393 | int error; |
325 | 394 | ||
326 | if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) | 395 | /* should really be a BUG_ON but b/c this is an |
396 | * init call, it just doesn't work. -dcz | ||
397 | */ | ||
398 | if (nmi_watchdog != NMI_LOCAL_APIC) | ||
399 | return 0; | ||
400 | |||
401 | if ( atomic_read(&nmi_active) < 0 ) | ||
327 | return 0; | 402 | return 0; |
328 | 403 | ||
329 | error = sysdev_class_register(&nmi_sysclass); | 404 | error = sysdev_class_register(&nmi_sysclass); |
@@ -341,74 +416,209 @@ late_initcall(init_lapic_nmi_sysfs); | |||
341 | * Original code written by Keith Owens. | 416 | * Original code written by Keith Owens. |
342 | */ | 417 | */ |
343 | 418 | ||
344 | static void clear_msr_range(unsigned int base, unsigned int n) | 419 | /* Note that these events don't tick when the CPU idles. This means |
345 | { | 420 | the frequency varies with CPU load. */ |
346 | unsigned int i; | ||
347 | 421 | ||
348 | for(i = 0; i < n; ++i) | 422 | #define K7_EVNTSEL_ENABLE (1 << 22) |
349 | wrmsr(base+i, 0, 0); | 423 | #define K7_EVNTSEL_INT (1 << 20) |
350 | } | 424 | #define K7_EVNTSEL_OS (1 << 17) |
425 | #define K7_EVNTSEL_USR (1 << 16) | ||
426 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
427 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
351 | 428 | ||
352 | static void setup_k7_watchdog(void) | 429 | static int setup_k7_watchdog(void) |
353 | { | 430 | { |
354 | int i; | 431 | unsigned int perfctr_msr, evntsel_msr; |
355 | unsigned int evntsel; | 432 | unsigned int evntsel; |
433 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
356 | 434 | ||
357 | nmi_perfctr_msr = MSR_K7_PERFCTR0; | 435 | perfctr_msr = MSR_K7_PERFCTR0; |
436 | evntsel_msr = MSR_K7_EVNTSEL0; | ||
437 | if (!reserve_perfctr_nmi(perfctr_msr)) | ||
438 | goto fail; | ||
358 | 439 | ||
359 | for(i = 0; i < 4; ++i) { | 440 | if (!reserve_evntsel_nmi(evntsel_msr)) |
360 | /* Simulator may not support it */ | 441 | goto fail1; |
361 | if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL)) { | 442 | |
362 | nmi_perfctr_msr = 0; | 443 | /* Simulator may not support it */ |
363 | return; | 444 | if (checking_wrmsrl(evntsel_msr, 0UL)) |
364 | } | 445 | goto fail2; |
365 | wrmsrl(MSR_K7_PERFCTR0+i, 0UL); | 446 | wrmsrl(perfctr_msr, 0UL); |
366 | } | ||
367 | 447 | ||
368 | evntsel = K7_EVNTSEL_INT | 448 | evntsel = K7_EVNTSEL_INT |
369 | | K7_EVNTSEL_OS | 449 | | K7_EVNTSEL_OS |
370 | | K7_EVNTSEL_USR | 450 | | K7_EVNTSEL_USR |
371 | | K7_NMI_EVENT; | 451 | | K7_NMI_EVENT; |
372 | 452 | ||
373 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 453 | /* setup the timer */ |
374 | wrmsrl(MSR_K7_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz)); | 454 | wrmsr(evntsel_msr, evntsel, 0); |
455 | wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
375 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 456 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
376 | evntsel |= K7_EVNTSEL_ENABLE; | 457 | evntsel |= K7_EVNTSEL_ENABLE; |
377 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 458 | wrmsr(evntsel_msr, evntsel, 0); |
459 | |||
460 | wd->perfctr_msr = perfctr_msr; | ||
461 | wd->evntsel_msr = evntsel_msr; | ||
462 | wd->cccr_msr = 0; //unused | ||
463 | wd->check_bit = 1ULL<<63; | ||
464 | return 1; | ||
465 | fail2: | ||
466 | release_evntsel_nmi(evntsel_msr); | ||
467 | fail1: | ||
468 | release_perfctr_nmi(perfctr_msr); | ||
469 | fail: | ||
470 | return 0; | ||
378 | } | 471 | } |
379 | 472 | ||
380 | static void disable_intel_arch_watchdog(void) | 473 | static void stop_k7_watchdog(void) |
381 | { | 474 | { |
382 | unsigned ebx; | 475 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
383 | 476 | ||
384 | /* | 477 | wrmsr(wd->evntsel_msr, 0, 0); |
385 | * Check whether the Architectural PerfMon supports | 478 | |
386 | * Unhalted Core Cycles Event or not. | 479 | release_evntsel_nmi(wd->evntsel_msr); |
387 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | 480 | release_perfctr_nmi(wd->perfctr_msr); |
481 | } | ||
482 | |||
483 | /* Note that these events don't tick when the CPU idles. This means | ||
484 | the frequency varies with CPU load. */ | ||
485 | |||
486 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | ||
487 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
488 | #define P4_ESCR_OS (1<<3) | ||
489 | #define P4_ESCR_USR (1<<2) | ||
490 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
491 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
492 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
493 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
494 | #define P4_CCCR_COMPARE (1<<18) | ||
495 | #define P4_CCCR_REQUIRED (3<<16) | ||
496 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
497 | #define P4_CCCR_ENABLE (1<<12) | ||
498 | #define P4_CCCR_OVF (1<<31) | ||
499 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
500 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
501 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
502 | |||
503 | static int setup_p4_watchdog(void) | ||
504 | { | ||
505 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | ||
506 | unsigned int evntsel, cccr_val; | ||
507 | unsigned int misc_enable, dummy; | ||
508 | unsigned int ht_num; | ||
509 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
510 | |||
511 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); | ||
512 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | ||
513 | return 0; | ||
514 | |||
515 | #ifdef CONFIG_SMP | ||
516 | /* detect which hyperthread we are on */ | ||
517 | if (smp_num_siblings == 2) { | ||
518 | unsigned int ebx, apicid; | ||
519 | |||
520 | ebx = cpuid_ebx(1); | ||
521 | apicid = (ebx >> 24) & 0xff; | ||
522 | ht_num = apicid & 1; | ||
523 | } else | ||
524 | #endif | ||
525 | ht_num = 0; | ||
526 | |||
527 | /* performance counters are shared resources | ||
528 | * assign each hyperthread its own set | ||
529 | * (re-use the ESCR0 register, seems safe | ||
530 | * and keeps the cccr_val the same) | ||
388 | */ | 531 | */ |
389 | ebx = cpuid_ebx(10); | 532 | if (!ht_num) { |
390 | if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | 533 | /* logical cpu 0 */ |
391 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0); | 534 | perfctr_msr = MSR_P4_IQ_PERFCTR0; |
535 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
536 | cccr_msr = MSR_P4_IQ_CCCR0; | ||
537 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | ||
538 | } else { | ||
539 | /* logical cpu 1 */ | ||
540 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | ||
541 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
542 | cccr_msr = MSR_P4_IQ_CCCR1; | ||
543 | cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | ||
544 | } | ||
545 | |||
546 | if (!reserve_perfctr_nmi(perfctr_msr)) | ||
547 | goto fail; | ||
548 | |||
549 | if (!reserve_evntsel_nmi(evntsel_msr)) | ||
550 | goto fail1; | ||
551 | |||
552 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | ||
553 | | P4_ESCR_OS | ||
554 | | P4_ESCR_USR; | ||
555 | |||
556 | cccr_val |= P4_CCCR_THRESHOLD(15) | ||
557 | | P4_CCCR_COMPLEMENT | ||
558 | | P4_CCCR_COMPARE | ||
559 | | P4_CCCR_REQUIRED; | ||
560 | |||
561 | wrmsr(evntsel_msr, evntsel, 0); | ||
562 | wrmsr(cccr_msr, cccr_val, 0); | ||
563 | wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
564 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
565 | cccr_val |= P4_CCCR_ENABLE; | ||
566 | wrmsr(cccr_msr, cccr_val, 0); | ||
567 | |||
568 | wd->perfctr_msr = perfctr_msr; | ||
569 | wd->evntsel_msr = evntsel_msr; | ||
570 | wd->cccr_msr = cccr_msr; | ||
571 | wd->check_bit = 1ULL<<39; | ||
572 | return 1; | ||
573 | fail1: | ||
574 | release_perfctr_nmi(perfctr_msr); | ||
575 | fail: | ||
576 | return 0; | ||
577 | } | ||
578 | |||
579 | static void stop_p4_watchdog(void) | ||
580 | { | ||
581 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
582 | |||
583 | wrmsr(wd->cccr_msr, 0, 0); | ||
584 | wrmsr(wd->evntsel_msr, 0, 0); | ||
585 | |||
586 | release_evntsel_nmi(wd->evntsel_msr); | ||
587 | release_perfctr_nmi(wd->perfctr_msr); | ||
392 | } | 588 | } |
393 | 589 | ||
590 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
591 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
592 | |||
394 | static int setup_intel_arch_watchdog(void) | 593 | static int setup_intel_arch_watchdog(void) |
395 | { | 594 | { |
595 | unsigned int ebx; | ||
596 | union cpuid10_eax eax; | ||
597 | unsigned int unused; | ||
598 | unsigned int perfctr_msr, evntsel_msr; | ||
396 | unsigned int evntsel; | 599 | unsigned int evntsel; |
397 | unsigned ebx; | 600 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
398 | 601 | ||
399 | /* | 602 | /* |
400 | * Check whether the Architectural PerfMon supports | 603 | * Check whether the Architectural PerfMon supports |
401 | * Unhalted Core Cycles Event or not. | 604 | * Unhalted Core Cycles Event or not. |
402 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | 605 | * NOTE: Corresponding bit = 0 in ebx indicates event present. |
403 | */ | 606 | */ |
404 | ebx = cpuid_ebx(10); | 607 | cpuid(10, &(eax.full), &ebx, &unused, &unused); |
405 | if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | 608 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || |
406 | return 0; | 609 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) |
610 | goto fail; | ||
611 | |||
612 | perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | ||
613 | evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0; | ||
407 | 614 | ||
408 | nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | 615 | if (!reserve_perfctr_nmi(perfctr_msr)) |
616 | goto fail; | ||
409 | 617 | ||
410 | clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); | 618 | if (!reserve_evntsel_nmi(evntsel_msr)) |
411 | clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); | 619 | goto fail1; |
620 | |||
621 | wrmsrl(perfctr_msr, 0UL); | ||
412 | 622 | ||
413 | evntsel = ARCH_PERFMON_EVENTSEL_INT | 623 | evntsel = ARCH_PERFMON_EVENTSEL_INT |
414 | | ARCH_PERFMON_EVENTSEL_OS | 624 | | ARCH_PERFMON_EVENTSEL_OS |
@@ -416,84 +626,122 @@ static int setup_intel_arch_watchdog(void) | |||
416 | | ARCH_PERFMON_NMI_EVENT_SEL | 626 | | ARCH_PERFMON_NMI_EVENT_SEL |
417 | | ARCH_PERFMON_NMI_EVENT_UMASK; | 627 | | ARCH_PERFMON_NMI_EVENT_UMASK; |
418 | 628 | ||
419 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | 629 | /* setup the timer */ |
420 | wrmsrl(MSR_ARCH_PERFMON_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz)); | 630 | wrmsr(evntsel_msr, evntsel, 0); |
631 | wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
632 | |||
421 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 633 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
422 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 634 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
423 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | 635 | wrmsr(evntsel_msr, evntsel, 0); |
636 | |||
637 | wd->perfctr_msr = perfctr_msr; | ||
638 | wd->evntsel_msr = evntsel_msr; | ||
639 | wd->cccr_msr = 0; //unused | ||
640 | wd->check_bit = 1ULL << (eax.split.bit_width - 1); | ||
424 | return 1; | 641 | return 1; |
642 | fail1: | ||
643 | release_perfctr_nmi(perfctr_msr); | ||
644 | fail: | ||
645 | return 0; | ||
425 | } | 646 | } |
426 | 647 | ||
427 | 648 | static void stop_intel_arch_watchdog(void) | |
428 | static int setup_p4_watchdog(void) | ||
429 | { | 649 | { |
430 | unsigned int misc_enable, dummy; | 650 | unsigned int ebx; |
651 | union cpuid10_eax eax; | ||
652 | unsigned int unused; | ||
653 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
431 | 654 | ||
432 | rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); | 655 | /* |
433 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | 656 | * Check whether the Architectural PerfMon supports |
434 | return 0; | 657 | * Unhalted Core Cycles Event or not. |
658 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
659 | */ | ||
660 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
661 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
662 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
663 | return; | ||
435 | 664 | ||
436 | nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; | 665 | wrmsr(wd->evntsel_msr, 0, 0); |
437 | nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; | ||
438 | #ifdef CONFIG_SMP | ||
439 | if (smp_num_siblings == 2) | ||
440 | nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; | ||
441 | #endif | ||
442 | 666 | ||
443 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) | 667 | release_evntsel_nmi(wd->evntsel_msr); |
444 | clear_msr_range(0x3F1, 2); | 668 | release_perfctr_nmi(wd->perfctr_msr); |
445 | /* MSR 0x3F0 seems to have a default value of 0xFC00, but current | ||
446 | docs doesn't fully define it, so leave it alone for now. */ | ||
447 | if (boot_cpu_data.x86_model >= 0x3) { | ||
448 | /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ | ||
449 | clear_msr_range(0x3A0, 26); | ||
450 | clear_msr_range(0x3BC, 3); | ||
451 | } else { | ||
452 | clear_msr_range(0x3A0, 31); | ||
453 | } | ||
454 | clear_msr_range(0x3C0, 6); | ||
455 | clear_msr_range(0x3C8, 6); | ||
456 | clear_msr_range(0x3E0, 2); | ||
457 | clear_msr_range(MSR_P4_CCCR0, 18); | ||
458 | clear_msr_range(MSR_P4_PERFCTR0, 18); | ||
459 | |||
460 | wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); | ||
461 | wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); | ||
462 | Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz * 1000UL / nmi_hz)); | ||
463 | wrmsrl(MSR_P4_IQ_COUNTER0, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
464 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
465 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | ||
466 | return 1; | ||
467 | } | 669 | } |
468 | 670 | ||
469 | void setup_apic_nmi_watchdog(void) | 671 | void setup_apic_nmi_watchdog(void *unused) |
470 | { | 672 | { |
471 | switch (boot_cpu_data.x86_vendor) { | 673 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
472 | case X86_VENDOR_AMD: | 674 | |
473 | if (boot_cpu_data.x86 != 15) | 675 | /* only support LOCAL and IO APICs for now */ |
474 | return; | 676 | if ((nmi_watchdog != NMI_LOCAL_APIC) && |
475 | if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) | 677 | (nmi_watchdog != NMI_IO_APIC)) |
476 | return; | 678 | return; |
477 | setup_k7_watchdog(); | 679 | |
478 | break; | 680 | if (wd->enabled == 1) |
479 | case X86_VENDOR_INTEL: | 681 | return; |
480 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 682 | |
481 | if (!setup_intel_arch_watchdog()) | 683 | /* cheap hack to support suspend/resume */ |
684 | /* if cpu0 is not active neither should the other cpus */ | ||
685 | if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) | ||
686 | return; | ||
687 | |||
688 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
689 | switch (boot_cpu_data.x86_vendor) { | ||
690 | case X86_VENDOR_AMD: | ||
691 | if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) | ||
482 | return; | 692 | return; |
483 | } else if (boot_cpu_data.x86 == 15) { | 693 | if (!setup_k7_watchdog()) |
694 | return; | ||
695 | break; | ||
696 | case X86_VENDOR_INTEL: | ||
697 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
698 | if (!setup_intel_arch_watchdog()) | ||
699 | return; | ||
700 | break; | ||
701 | } | ||
484 | if (!setup_p4_watchdog()) | 702 | if (!setup_p4_watchdog()) |
485 | return; | 703 | return; |
486 | } else { | 704 | break; |
705 | default: | ||
487 | return; | 706 | return; |
488 | } | 707 | } |
708 | } | ||
709 | wd->enabled = 1; | ||
710 | atomic_inc(&nmi_active); | ||
711 | } | ||
712 | |||
713 | void stop_apic_nmi_watchdog(void *unused) | ||
714 | { | ||
715 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
489 | 716 | ||
490 | break; | 717 | /* only support LOCAL and IO APICs for now */ |
718 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | ||
719 | (nmi_watchdog != NMI_IO_APIC)) | ||
720 | return; | ||
491 | 721 | ||
492 | default: | 722 | if (wd->enabled == 0) |
493 | return; | 723 | return; |
724 | |||
725 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
726 | switch (boot_cpu_data.x86_vendor) { | ||
727 | case X86_VENDOR_AMD: | ||
728 | if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) | ||
729 | return; | ||
730 | stop_k7_watchdog(); | ||
731 | break; | ||
732 | case X86_VENDOR_INTEL: | ||
733 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
734 | stop_intel_arch_watchdog(); | ||
735 | break; | ||
736 | } | ||
737 | stop_p4_watchdog(); | ||
738 | break; | ||
739 | default: | ||
740 | return; | ||
741 | } | ||
494 | } | 742 | } |
495 | lapic_nmi_owner = LAPIC_NMI_WATCHDOG; | 743 | wd->enabled = 0; |
496 | nmi_active = 1; | 744 | atomic_dec(&nmi_active); |
497 | } | 745 | } |
498 | 746 | ||
499 | /* | 747 | /* |
@@ -526,93 +774,109 @@ void touch_nmi_watchdog (void) | |||
526 | touch_softlockup_watchdog(); | 774 | touch_softlockup_watchdog(); |
527 | } | 775 | } |
528 | 776 | ||
529 | void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | 777 | int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) |
530 | { | 778 | { |
531 | int sum; | 779 | int sum; |
532 | int touched = 0; | 780 | int touched = 0; |
781 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
782 | u64 dummy; | ||
783 | int rc=0; | ||
784 | |||
785 | /* check for other users first */ | ||
786 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | ||
787 | == NOTIFY_STOP) { | ||
788 | rc = 1; | ||
789 | touched = 1; | ||
790 | } | ||
533 | 791 | ||
534 | sum = read_pda(apic_timer_irqs); | 792 | sum = read_pda(apic_timer_irqs); |
535 | if (__get_cpu_var(nmi_touch)) { | 793 | if (__get_cpu_var(nmi_touch)) { |
536 | __get_cpu_var(nmi_touch) = 0; | 794 | __get_cpu_var(nmi_touch) = 0; |
537 | touched = 1; | 795 | touched = 1; |
538 | } | 796 | } |
797 | |||
539 | #ifdef CONFIG_X86_MCE | 798 | #ifdef CONFIG_X86_MCE |
540 | /* Could check oops_in_progress here too, but it's safer | 799 | /* Could check oops_in_progress here too, but it's safer |
541 | not too */ | 800 | not too */ |
542 | if (atomic_read(&mce_entry) > 0) | 801 | if (atomic_read(&mce_entry) > 0) |
543 | touched = 1; | 802 | touched = 1; |
544 | #endif | 803 | #endif |
804 | /* if the apic timer isn't firing, this cpu isn't doing much */ | ||
545 | if (!touched && __get_cpu_var(last_irq_sum) == sum) { | 805 | if (!touched && __get_cpu_var(last_irq_sum) == sum) { |
546 | /* | 806 | /* |
547 | * Ayiee, looks like this CPU is stuck ... | 807 | * Ayiee, looks like this CPU is stuck ... |
548 | * wait a few IRQs (5 seconds) before doing the oops ... | 808 | * wait a few IRQs (5 seconds) before doing the oops ... |
549 | */ | 809 | */ |
550 | local_inc(&__get_cpu_var(alert_counter)); | 810 | local_inc(&__get_cpu_var(alert_counter)); |
551 | if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { | 811 | if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) |
552 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | 812 | die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs, |
553 | == NOTIFY_STOP) { | 813 | panic_on_timeout); |
554 | local_set(&__get_cpu_var(alert_counter), 0); | ||
555 | return; | ||
556 | } | ||
557 | die_nmi("NMI Watchdog detected LOCKUP on CPU %d\n", regs); | ||
558 | } | ||
559 | } else { | 814 | } else { |
560 | __get_cpu_var(last_irq_sum) = sum; | 815 | __get_cpu_var(last_irq_sum) = sum; |
561 | local_set(&__get_cpu_var(alert_counter), 0); | 816 | local_set(&__get_cpu_var(alert_counter), 0); |
562 | } | 817 | } |
563 | if (nmi_perfctr_msr) { | 818 | |
564 | if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { | 819 | /* see if the nmi watchdog went off */ |
565 | /* | 820 | if (wd->enabled) { |
566 | * P4 quirks: | 821 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
567 | * - An overflown perfctr will assert its interrupt | 822 | rdmsrl(wd->perfctr_msr, dummy); |
568 | * until the OVF flag in its CCCR is cleared. | 823 | if (dummy & wd->check_bit){ |
569 | * - LVTPC is masked on interrupt and must be | 824 | /* this wasn't a watchdog timer interrupt */ |
570 | * unmasked by the LVTPC handler. | 825 | goto done; |
571 | */ | 826 | } |
572 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 827 | |
573 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 828 | /* only Intel uses the cccr msr */ |
574 | } else if (nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | 829 | if (wd->cccr_msr != 0) { |
575 | /* | 830 | /* |
576 | * For Intel based architectural perfmon | 831 | * P4 quirks: |
577 | * - LVTPC is masked on interrupt and must be | 832 | * - An overflown perfctr will assert its interrupt |
578 | * unmasked by the LVTPC handler. | 833 | * until the OVF flag in its CCCR is cleared. |
834 | * - LVTPC is masked on interrupt and must be | ||
835 | * unmasked by the LVTPC handler. | ||
836 | */ | ||
837 | rdmsrl(wd->cccr_msr, dummy); | ||
838 | dummy &= ~P4_CCCR_OVF; | ||
839 | wrmsrl(wd->cccr_msr, dummy); | ||
840 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
841 | } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
842 | /* | ||
843 | * ArchPerfom/Core Duo needs to re-unmask | ||
844 | * the apic vector | ||
845 | */ | ||
846 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
847 | } | ||
848 | /* start the cycle over again */ | ||
849 | wrmsrl(wd->perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
850 | rc = 1; | ||
851 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
852 | /* don't know how to accurately check for this. | ||
853 | * just assume it was a watchdog timer interrupt | ||
854 | * This matches the old behaviour. | ||
579 | */ | 855 | */ |
580 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 856 | rc = 1; |
581 | } | 857 | } else |
582 | wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | 858 | printk(KERN_WARNING "Unknown enabled NMI hardware?!\n"); |
583 | } | 859 | } |
860 | done: | ||
861 | return rc; | ||
584 | } | 862 | } |
585 | 863 | ||
586 | static __kprobes int dummy_nmi_callback(struct pt_regs * regs, int cpu) | ||
587 | { | ||
588 | return 0; | ||
589 | } | ||
590 | |||
591 | static nmi_callback_t nmi_callback = dummy_nmi_callback; | ||
592 | |||
593 | asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) | 864 | asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) |
594 | { | 865 | { |
595 | int cpu = safe_smp_processor_id(); | ||
596 | |||
597 | nmi_enter(); | 866 | nmi_enter(); |
598 | add_pda(__nmi_count,1); | 867 | add_pda(__nmi_count,1); |
599 | if (!rcu_dereference(nmi_callback)(regs, cpu)) | 868 | default_do_nmi(regs); |
600 | default_do_nmi(regs); | ||
601 | nmi_exit(); | 869 | nmi_exit(); |
602 | } | 870 | } |
603 | 871 | ||
604 | void set_nmi_callback(nmi_callback_t callback) | 872 | int do_nmi_callback(struct pt_regs * regs, int cpu) |
605 | { | 873 | { |
606 | vmalloc_sync_all(); | 874 | #ifdef CONFIG_SYSCTL |
607 | rcu_assign_pointer(nmi_callback, callback); | 875 | if (unknown_nmi_panic) |
608 | } | 876 | return unknown_nmi_panic_callback(regs, cpu); |
609 | EXPORT_SYMBOL_GPL(set_nmi_callback); | 877 | #endif |
610 | 878 | return 0; | |
611 | void unset_nmi_callback(void) | ||
612 | { | ||
613 | nmi_callback = dummy_nmi_callback; | ||
614 | } | 879 | } |
615 | EXPORT_SYMBOL_GPL(unset_nmi_callback); | ||
616 | 880 | ||
617 | #ifdef CONFIG_SYSCTL | 881 | #ifdef CONFIG_SYSCTL |
618 | 882 | ||
@@ -621,36 +885,42 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | |||
621 | unsigned char reason = get_nmi_reason(); | 885 | unsigned char reason = get_nmi_reason(); |
622 | char buf[64]; | 886 | char buf[64]; |
623 | 887 | ||
624 | if (!(reason & 0xc0)) { | 888 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); |
625 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); | 889 | die_nmi(buf, regs, 1); /* Always panic here */ |
626 | die_nmi(buf,regs); | ||
627 | } | ||
628 | return 0; | 890 | return 0; |
629 | } | 891 | } |
630 | 892 | ||
631 | /* | 893 | /* |
632 | * proc handler for /proc/sys/kernel/unknown_nmi_panic | 894 | * proc handler for /proc/sys/kernel/nmi |
633 | */ | 895 | */ |
634 | int proc_unknown_nmi_panic(struct ctl_table *table, int write, struct file *file, | 896 | int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, |
635 | void __user *buffer, size_t *length, loff_t *ppos) | 897 | void __user *buffer, size_t *length, loff_t *ppos) |
636 | { | 898 | { |
637 | int old_state; | 899 | int old_state; |
638 | 900 | ||
639 | old_state = unknown_nmi_panic; | 901 | nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; |
902 | old_state = nmi_watchdog_enabled; | ||
640 | proc_dointvec(table, write, file, buffer, length, ppos); | 903 | proc_dointvec(table, write, file, buffer, length, ppos); |
641 | if (!!old_state == !!unknown_nmi_panic) | 904 | if (!!old_state == !!nmi_watchdog_enabled) |
642 | return 0; | 905 | return 0; |
643 | 906 | ||
644 | if (unknown_nmi_panic) { | 907 | if (atomic_read(&nmi_active) < 0) { |
645 | if (reserve_lapic_nmi() < 0) { | 908 | printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); |
646 | unknown_nmi_panic = 0; | 909 | return -EIO; |
647 | return -EBUSY; | 910 | } |
648 | } else { | 911 | |
649 | set_nmi_callback(unknown_nmi_panic_callback); | 912 | /* if nmi_watchdog is not set yet, then set it */ |
650 | } | 913 | nmi_watchdog_default(); |
914 | |||
915 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
916 | if (nmi_watchdog_enabled) | ||
917 | enable_lapic_nmi_watchdog(); | ||
918 | else | ||
919 | disable_lapic_nmi_watchdog(); | ||
651 | } else { | 920 | } else { |
652 | release_lapic_nmi(); | 921 | printk( KERN_WARNING |
653 | unset_nmi_callback(); | 922 | "NMI watchdog doesn't know what hardware to touch\n"); |
923 | return -EIO; | ||
654 | } | 924 | } |
655 | return 0; | 925 | return 0; |
656 | } | 926 | } |
@@ -659,8 +929,12 @@ int proc_unknown_nmi_panic(struct ctl_table *table, int write, struct file *file | |||
659 | 929 | ||
660 | EXPORT_SYMBOL(nmi_active); | 930 | EXPORT_SYMBOL(nmi_active); |
661 | EXPORT_SYMBOL(nmi_watchdog); | 931 | EXPORT_SYMBOL(nmi_watchdog); |
662 | EXPORT_SYMBOL(reserve_lapic_nmi); | 932 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); |
663 | EXPORT_SYMBOL(release_lapic_nmi); | 933 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); |
934 | EXPORT_SYMBOL(reserve_perfctr_nmi); | ||
935 | EXPORT_SYMBOL(release_perfctr_nmi); | ||
936 | EXPORT_SYMBOL(reserve_evntsel_nmi); | ||
937 | EXPORT_SYMBOL(release_evntsel_nmi); | ||
664 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); | 938 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); |
665 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); | 939 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); |
666 | EXPORT_SYMBOL(touch_nmi_watchdog); | 940 | EXPORT_SYMBOL(touch_nmi_watchdog); |