diff options
author | Jeff Garzik <jeff@garzik.org> | 2006-09-27 18:13:53 -0400 |
---|---|---|
committer | Jeff Garzik <jeff@garzik.org> | 2006-09-27 18:13:53 -0400 |
commit | aebb1153ac54ddbbd3d3f0481a193f4bf0ead53b (patch) | |
tree | 57425aa83c8bed5b41af7e3408024fe1f2fdded9 /arch/i386/kernel/nmi.c | |
parent | 022e7a12b6aa11a11de4d708fe8606c9a6734b37 (diff) | |
parent | a77c64c1a641950626181b4857abb701d8f38ccc (diff) |
Merge branch 'master' into upstream
Diffstat (limited to 'arch/i386/kernel/nmi.c')
-rw-r--r-- | arch/i386/kernel/nmi.c | 940 |
1 files changed, 651 insertions, 289 deletions
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index acb351478e42..dbda706fdd14 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
@@ -21,83 +21,174 @@ | |||
21 | #include <linux/sysdev.h> | 21 | #include <linux/sysdev.h> |
22 | #include <linux/sysctl.h> | 22 | #include <linux/sysctl.h> |
23 | #include <linux/percpu.h> | 23 | #include <linux/percpu.h> |
24 | #include <linux/dmi.h> | ||
25 | #include <linux/kprobes.h> | ||
24 | 26 | ||
25 | #include <asm/smp.h> | 27 | #include <asm/smp.h> |
26 | #include <asm/nmi.h> | 28 | #include <asm/nmi.h> |
29 | #include <asm/kdebug.h> | ||
27 | #include <asm/intel_arch_perfmon.h> | 30 | #include <asm/intel_arch_perfmon.h> |
28 | 31 | ||
29 | #include "mach_traps.h" | 32 | #include "mach_traps.h" |
30 | 33 | ||
31 | unsigned int nmi_watchdog = NMI_NONE; | 34 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: |
32 | extern int unknown_nmi_panic; | 35 | * evtsel_nmi_owner tracks the ownership of the event selection |
33 | static unsigned int nmi_hz = HZ; | 36 | * - different performance counters/ event selection may be reserved for |
34 | static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ | 37 | * different subsystems this reservation system just tries to coordinate |
35 | static unsigned int nmi_p4_cccr_val; | 38 | * things a little |
36 | extern void show_registers(struct pt_regs *regs); | 39 | */ |
40 | static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); | ||
41 | static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); | ||
37 | 42 | ||
38 | /* | 43 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's |
39 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: | 44 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) |
40 | * - it may be reserved by some other driver, or not | ||
41 | * - when not reserved by some other driver, it may be used for | ||
42 | * the NMI watchdog, or not | ||
43 | * | ||
44 | * This is maintained separately from nmi_active because the NMI | ||
45 | * watchdog may also be driven from the I/O APIC timer. | ||
46 | */ | 45 | */ |
47 | static DEFINE_SPINLOCK(lapic_nmi_owner_lock); | 46 | #define NMI_MAX_COUNTER_BITS 66 |
48 | static unsigned int lapic_nmi_owner; | ||
49 | #define LAPIC_NMI_WATCHDOG (1<<0) | ||
50 | #define LAPIC_NMI_RESERVED (1<<1) | ||
51 | 47 | ||
52 | /* nmi_active: | 48 | /* nmi_active: |
53 | * +1: the lapic NMI watchdog is active, but can be disabled | 49 | * >0: the lapic NMI watchdog is active, but can be disabled |
54 | * 0: the lapic NMI watchdog has not been set up, and cannot | 50 | * <0: the lapic NMI watchdog has not been set up, and cannot |
55 | * be enabled | 51 | * be enabled |
56 | * -1: the lapic NMI watchdog is disabled, but can be enabled | 52 | * 0: the lapic NMI watchdog is disabled, but can be enabled |
57 | */ | 53 | */ |
58 | int nmi_active; | 54 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ |
59 | 55 | ||
60 | #define K7_EVNTSEL_ENABLE (1 << 22) | 56 | unsigned int nmi_watchdog = NMI_DEFAULT; |
61 | #define K7_EVNTSEL_INT (1 << 20) | 57 | static unsigned int nmi_hz = HZ; |
62 | #define K7_EVNTSEL_OS (1 << 17) | ||
63 | #define K7_EVNTSEL_USR (1 << 16) | ||
64 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
65 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
66 | 58 | ||
67 | #define P6_EVNTSEL0_ENABLE (1 << 22) | 59 | struct nmi_watchdog_ctlblk { |
68 | #define P6_EVNTSEL_INT (1 << 20) | 60 | int enabled; |
69 | #define P6_EVNTSEL_OS (1 << 17) | 61 | u64 check_bit; |
70 | #define P6_EVNTSEL_USR (1 << 16) | 62 | unsigned int cccr_msr; |
71 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | 63 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ |
72 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | 64 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ |
65 | }; | ||
66 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | ||
73 | 67 | ||
74 | #define MSR_P4_MISC_ENABLE 0x1A0 | 68 | /* local prototypes */ |
75 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | 69 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); |
76 | #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) | ||
77 | #define MSR_P4_PERFCTR0 0x300 | ||
78 | #define MSR_P4_CCCR0 0x360 | ||
79 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
80 | #define P4_ESCR_OS (1<<3) | ||
81 | #define P4_ESCR_USR (1<<2) | ||
82 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
83 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
84 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
85 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
86 | #define P4_CCCR_COMPARE (1<<18) | ||
87 | #define P4_CCCR_REQUIRED (3<<16) | ||
88 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
89 | #define P4_CCCR_ENABLE (1<<12) | ||
90 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
91 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
92 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
93 | #define MSR_P4_IQ_COUNTER0 0x30C | ||
94 | #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) | ||
95 | #define P4_NMI_IQ_CCCR0 \ | ||
96 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ | ||
97 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) | ||
98 | 70 | ||
99 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | 71 | extern void show_registers(struct pt_regs *regs); |
100 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | 72 | extern int unknown_nmi_panic; |
73 | |||
74 | /* converts an msr to an appropriate reservation bit */ | ||
75 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | ||
76 | { | ||
77 | /* returns the bit offset of the performance counter register */ | ||
78 | switch (boot_cpu_data.x86_vendor) { | ||
79 | case X86_VENDOR_AMD: | ||
80 | return (msr - MSR_K7_PERFCTR0); | ||
81 | case X86_VENDOR_INTEL: | ||
82 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
83 | return (msr - MSR_ARCH_PERFMON_PERFCTR0); | ||
84 | |||
85 | switch (boot_cpu_data.x86) { | ||
86 | case 6: | ||
87 | return (msr - MSR_P6_PERFCTR0); | ||
88 | case 15: | ||
89 | return (msr - MSR_P4_BPU_PERFCTR0); | ||
90 | } | ||
91 | } | ||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | /* converts an msr to an appropriate reservation bit */ | ||
96 | static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | ||
97 | { | ||
98 | /* returns the bit offset of the event selection register */ | ||
99 | switch (boot_cpu_data.x86_vendor) { | ||
100 | case X86_VENDOR_AMD: | ||
101 | return (msr - MSR_K7_EVNTSEL0); | ||
102 | case X86_VENDOR_INTEL: | ||
103 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
104 | return (msr - MSR_ARCH_PERFMON_EVENTSEL0); | ||
105 | |||
106 | switch (boot_cpu_data.x86) { | ||
107 | case 6: | ||
108 | return (msr - MSR_P6_EVNTSEL0); | ||
109 | case 15: | ||
110 | return (msr - MSR_P4_BSU_ESCR0); | ||
111 | } | ||
112 | } | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | /* checks for a bit availability (hack for oprofile) */ | ||
117 | int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | ||
118 | { | ||
119 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
120 | |||
121 | return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); | ||
122 | } | ||
123 | |||
124 | /* checks the an msr for availability */ | ||
125 | int avail_to_resrv_perfctr_nmi(unsigned int msr) | ||
126 | { | ||
127 | unsigned int counter; | ||
128 | |||
129 | counter = nmi_perfctr_msr_to_bit(msr); | ||
130 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
131 | |||
132 | return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); | ||
133 | } | ||
134 | |||
135 | int reserve_perfctr_nmi(unsigned int msr) | ||
136 | { | ||
137 | unsigned int counter; | ||
138 | |||
139 | counter = nmi_perfctr_msr_to_bit(msr); | ||
140 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
141 | |||
142 | if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner))) | ||
143 | return 1; | ||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | void release_perfctr_nmi(unsigned int msr) | ||
148 | { | ||
149 | unsigned int counter; | ||
150 | |||
151 | counter = nmi_perfctr_msr_to_bit(msr); | ||
152 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
153 | |||
154 | clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner)); | ||
155 | } | ||
156 | |||
157 | int reserve_evntsel_nmi(unsigned int msr) | ||
158 | { | ||
159 | unsigned int counter; | ||
160 | |||
161 | counter = nmi_evntsel_msr_to_bit(msr); | ||
162 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
163 | |||
164 | if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0])) | ||
165 | return 1; | ||
166 | return 0; | ||
167 | } | ||
168 | |||
169 | void release_evntsel_nmi(unsigned int msr) | ||
170 | { | ||
171 | unsigned int counter; | ||
172 | |||
173 | counter = nmi_evntsel_msr_to_bit(msr); | ||
174 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
175 | |||
176 | clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]); | ||
177 | } | ||
178 | |||
179 | static __cpuinit inline int nmi_known_cpu(void) | ||
180 | { | ||
181 | switch (boot_cpu_data.x86_vendor) { | ||
182 | case X86_VENDOR_AMD: | ||
183 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); | ||
184 | case X86_VENDOR_INTEL: | ||
185 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
186 | return 1; | ||
187 | else | ||
188 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); | ||
189 | } | ||
190 | return 0; | ||
191 | } | ||
101 | 192 | ||
102 | #ifdef CONFIG_SMP | 193 | #ifdef CONFIG_SMP |
103 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when | 194 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when |
@@ -125,7 +216,18 @@ static int __init check_nmi_watchdog(void) | |||
125 | unsigned int *prev_nmi_count; | 216 | unsigned int *prev_nmi_count; |
126 | int cpu; | 217 | int cpu; |
127 | 218 | ||
128 | if (nmi_watchdog == NMI_NONE) | 219 | /* Enable NMI watchdog for newer systems. |
220 | Actually it should be safe for most systems before 2004 too except | ||
221 | for some IBM systems that corrupt registers when NMI happens | ||
222 | during SMM. Unfortunately we don't have more exact information | ||
223 | on these and use this coarse check. */ | ||
224 | if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004) | ||
225 | nmi_watchdog = NMI_LOCAL_APIC; | ||
226 | |||
227 | if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT)) | ||
228 | return 0; | ||
229 | |||
230 | if (!atomic_read(&nmi_active)) | ||
129 | return 0; | 231 | return 0; |
130 | 232 | ||
131 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); | 233 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); |
@@ -149,25 +251,45 @@ static int __init check_nmi_watchdog(void) | |||
149 | if (!cpu_isset(cpu, cpu_callin_map)) | 251 | if (!cpu_isset(cpu, cpu_callin_map)) |
150 | continue; | 252 | continue; |
151 | #endif | 253 | #endif |
254 | if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) | ||
255 | continue; | ||
152 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { | 256 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { |
153 | endflag = 1; | ||
154 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", | 257 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", |
155 | cpu, | 258 | cpu, |
156 | prev_nmi_count[cpu], | 259 | prev_nmi_count[cpu], |
157 | nmi_count(cpu)); | 260 | nmi_count(cpu)); |
158 | nmi_active = 0; | 261 | per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; |
159 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; | 262 | atomic_dec(&nmi_active); |
160 | kfree(prev_nmi_count); | ||
161 | return -1; | ||
162 | } | 263 | } |
163 | } | 264 | } |
265 | if (!atomic_read(&nmi_active)) { | ||
266 | kfree(prev_nmi_count); | ||
267 | atomic_set(&nmi_active, -1); | ||
268 | return -1; | ||
269 | } | ||
164 | endflag = 1; | 270 | endflag = 1; |
165 | printk("OK.\n"); | 271 | printk("OK.\n"); |
166 | 272 | ||
167 | /* now that we know it works we can reduce NMI frequency to | 273 | /* now that we know it works we can reduce NMI frequency to |
168 | something more reasonable; makes a difference in some configs */ | 274 | something more reasonable; makes a difference in some configs */ |
169 | if (nmi_watchdog == NMI_LOCAL_APIC) | 275 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
276 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
277 | |||
170 | nmi_hz = 1; | 278 | nmi_hz = 1; |
279 | /* | ||
280 | * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter | ||
281 | * are writable, with higher bits sign extending from bit 31. | ||
282 | * So, we can only program the counter with 31 bit values and | ||
283 | * 32nd bit should be 1, for 33.. to be 1. | ||
284 | * Find the appropriate nmi_hz | ||
285 | */ | ||
286 | if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 && | ||
287 | ((u64)cpu_khz * 1000) > 0x7fffffffULL) { | ||
288 | u64 count = (u64)cpu_khz * 1000; | ||
289 | do_div(count, 0x7fffffffUL); | ||
290 | nmi_hz = count + 1; | ||
291 | } | ||
292 | } | ||
171 | 293 | ||
172 | kfree(prev_nmi_count); | 294 | kfree(prev_nmi_count); |
173 | return 0; | 295 | return 0; |
@@ -181,124 +303,70 @@ static int __init setup_nmi_watchdog(char *str) | |||
181 | 303 | ||
182 | get_option(&str, &nmi); | 304 | get_option(&str, &nmi); |
183 | 305 | ||
184 | if (nmi >= NMI_INVALID) | 306 | if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) |
185 | return 0; | 307 | return 0; |
186 | if (nmi == NMI_NONE) | ||
187 | nmi_watchdog = nmi; | ||
188 | /* | 308 | /* |
189 | * If any other x86 CPU has a local APIC, then | 309 | * If any other x86 CPU has a local APIC, then |
190 | * please test the NMI stuff there and send me the | 310 | * please test the NMI stuff there and send me the |
191 | * missing bits. Right now Intel P6/P4 and AMD K7 only. | 311 | * missing bits. Right now Intel P6/P4 and AMD K7 only. |
192 | */ | 312 | */ |
193 | if ((nmi == NMI_LOCAL_APIC) && | 313 | if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0)) |
194 | (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | 314 | return 0; /* no lapic support */ |
195 | (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) | 315 | nmi_watchdog = nmi; |
196 | nmi_watchdog = nmi; | ||
197 | if ((nmi == NMI_LOCAL_APIC) && | ||
198 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && | ||
199 | (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) | ||
200 | nmi_watchdog = nmi; | ||
201 | /* | ||
202 | * We can enable the IO-APIC watchdog | ||
203 | * unconditionally. | ||
204 | */ | ||
205 | if (nmi == NMI_IO_APIC) { | ||
206 | nmi_active = 1; | ||
207 | nmi_watchdog = nmi; | ||
208 | } | ||
209 | return 1; | 316 | return 1; |
210 | } | 317 | } |
211 | 318 | ||
212 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 319 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
213 | 320 | ||
214 | static void disable_intel_arch_watchdog(void); | ||
215 | |||
216 | static void disable_lapic_nmi_watchdog(void) | 321 | static void disable_lapic_nmi_watchdog(void) |
217 | { | 322 | { |
218 | if (nmi_active <= 0) | 323 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); |
324 | |||
325 | if (atomic_read(&nmi_active) <= 0) | ||
219 | return; | 326 | return; |
220 | switch (boot_cpu_data.x86_vendor) { | ||
221 | case X86_VENDOR_AMD: | ||
222 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); | ||
223 | break; | ||
224 | case X86_VENDOR_INTEL: | ||
225 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
226 | disable_intel_arch_watchdog(); | ||
227 | break; | ||
228 | } | ||
229 | switch (boot_cpu_data.x86) { | ||
230 | case 6: | ||
231 | if (boot_cpu_data.x86_model > 0xd) | ||
232 | break; | ||
233 | 327 | ||
234 | wrmsr(MSR_P6_EVNTSEL0, 0, 0); | 328 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); |
235 | break; | ||
236 | case 15: | ||
237 | if (boot_cpu_data.x86_model > 0x4) | ||
238 | break; | ||
239 | 329 | ||
240 | wrmsr(MSR_P4_IQ_CCCR0, 0, 0); | 330 | BUG_ON(atomic_read(&nmi_active) != 0); |
241 | wrmsr(MSR_P4_CRU_ESCR0, 0, 0); | ||
242 | break; | ||
243 | } | ||
244 | break; | ||
245 | } | ||
246 | nmi_active = -1; | ||
247 | /* tell do_nmi() and others that we're not active any more */ | ||
248 | nmi_watchdog = 0; | ||
249 | } | 331 | } |
250 | 332 | ||
251 | static void enable_lapic_nmi_watchdog(void) | 333 | static void enable_lapic_nmi_watchdog(void) |
252 | { | 334 | { |
253 | if (nmi_active < 0) { | 335 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); |
254 | nmi_watchdog = NMI_LOCAL_APIC; | ||
255 | setup_apic_nmi_watchdog(); | ||
256 | } | ||
257 | } | ||
258 | 336 | ||
259 | int reserve_lapic_nmi(void) | 337 | /* are we already enabled */ |
260 | { | 338 | if (atomic_read(&nmi_active) != 0) |
261 | unsigned int old_owner; | 339 | return; |
262 | |||
263 | spin_lock(&lapic_nmi_owner_lock); | ||
264 | old_owner = lapic_nmi_owner; | ||
265 | lapic_nmi_owner |= LAPIC_NMI_RESERVED; | ||
266 | spin_unlock(&lapic_nmi_owner_lock); | ||
267 | if (old_owner & LAPIC_NMI_RESERVED) | ||
268 | return -EBUSY; | ||
269 | if (old_owner & LAPIC_NMI_WATCHDOG) | ||
270 | disable_lapic_nmi_watchdog(); | ||
271 | return 0; | ||
272 | } | ||
273 | 340 | ||
274 | void release_lapic_nmi(void) | 341 | /* are we lapic aware */ |
275 | { | 342 | if (nmi_known_cpu() <= 0) |
276 | unsigned int new_owner; | 343 | return; |
277 | 344 | ||
278 | spin_lock(&lapic_nmi_owner_lock); | 345 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); |
279 | new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; | 346 | touch_nmi_watchdog(); |
280 | lapic_nmi_owner = new_owner; | ||
281 | spin_unlock(&lapic_nmi_owner_lock); | ||
282 | if (new_owner & LAPIC_NMI_WATCHDOG) | ||
283 | enable_lapic_nmi_watchdog(); | ||
284 | } | 347 | } |
285 | 348 | ||
286 | void disable_timer_nmi_watchdog(void) | 349 | void disable_timer_nmi_watchdog(void) |
287 | { | 350 | { |
288 | if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) | 351 | BUG_ON(nmi_watchdog != NMI_IO_APIC); |
352 | |||
353 | if (atomic_read(&nmi_active) <= 0) | ||
289 | return; | 354 | return; |
290 | 355 | ||
291 | unset_nmi_callback(); | 356 | disable_irq(0); |
292 | nmi_active = -1; | 357 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); |
293 | nmi_watchdog = NMI_NONE; | 358 | |
359 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
294 | } | 360 | } |
295 | 361 | ||
296 | void enable_timer_nmi_watchdog(void) | 362 | void enable_timer_nmi_watchdog(void) |
297 | { | 363 | { |
298 | if (nmi_active < 0) { | 364 | BUG_ON(nmi_watchdog != NMI_IO_APIC); |
299 | nmi_watchdog = NMI_IO_APIC; | 365 | |
366 | if (atomic_read(&nmi_active) == 0) { | ||
300 | touch_nmi_watchdog(); | 367 | touch_nmi_watchdog(); |
301 | nmi_active = 1; | 368 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); |
369 | enable_irq(0); | ||
302 | } | 370 | } |
303 | } | 371 | } |
304 | 372 | ||
@@ -308,15 +376,20 @@ static int nmi_pm_active; /* nmi_active before suspend */ | |||
308 | 376 | ||
309 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) | 377 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) |
310 | { | 378 | { |
311 | nmi_pm_active = nmi_active; | 379 | /* only CPU0 goes here, other CPUs should be offline */ |
312 | disable_lapic_nmi_watchdog(); | 380 | nmi_pm_active = atomic_read(&nmi_active); |
381 | stop_apic_nmi_watchdog(NULL); | ||
382 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
313 | return 0; | 383 | return 0; |
314 | } | 384 | } |
315 | 385 | ||
316 | static int lapic_nmi_resume(struct sys_device *dev) | 386 | static int lapic_nmi_resume(struct sys_device *dev) |
317 | { | 387 | { |
318 | if (nmi_pm_active > 0) | 388 | /* only CPU0 goes here, other CPUs should be offline */ |
319 | enable_lapic_nmi_watchdog(); | 389 | if (nmi_pm_active > 0) { |
390 | setup_apic_nmi_watchdog(NULL); | ||
391 | touch_nmi_watchdog(); | ||
392 | } | ||
320 | return 0; | 393 | return 0; |
321 | } | 394 | } |
322 | 395 | ||
@@ -336,7 +409,13 @@ static int __init init_lapic_nmi_sysfs(void) | |||
336 | { | 409 | { |
337 | int error; | 410 | int error; |
338 | 411 | ||
339 | if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) | 412 | /* should really be a BUG_ON but b/c this is an |
413 | * init call, it just doesn't work. -dcz | ||
414 | */ | ||
415 | if (nmi_watchdog != NMI_LOCAL_APIC) | ||
416 | return 0; | ||
417 | |||
418 | if ( atomic_read(&nmi_active) < 0 ) | ||
340 | return 0; | 419 | return 0; |
341 | 420 | ||
342 | error = sysdev_class_register(&nmi_sysclass); | 421 | error = sysdev_class_register(&nmi_sysclass); |
@@ -354,138 +433,269 @@ late_initcall(init_lapic_nmi_sysfs); | |||
354 | * Original code written by Keith Owens. | 433 | * Original code written by Keith Owens. |
355 | */ | 434 | */ |
356 | 435 | ||
357 | static void clear_msr_range(unsigned int base, unsigned int n) | 436 | static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr) |
358 | { | ||
359 | unsigned int i; | ||
360 | |||
361 | for(i = 0; i < n; ++i) | ||
362 | wrmsr(base+i, 0, 0); | ||
363 | } | ||
364 | |||
365 | static void write_watchdog_counter(const char *descr) | ||
366 | { | 437 | { |
367 | u64 count = (u64)cpu_khz * 1000; | 438 | u64 count = (u64)cpu_khz * 1000; |
368 | 439 | ||
369 | do_div(count, nmi_hz); | 440 | do_div(count, nmi_hz); |
370 | if(descr) | 441 | if(descr) |
371 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | 442 | Dprintk("setting %s to -0x%08Lx\n", descr, count); |
372 | wrmsrl(nmi_perfctr_msr, 0 - count); | 443 | wrmsrl(perfctr_msr, 0 - count); |
373 | } | 444 | } |
374 | 445 | ||
375 | static void setup_k7_watchdog(void) | 446 | /* Note that these events don't tick when the CPU idles. This means |
447 | the frequency varies with CPU load. */ | ||
448 | |||
449 | #define K7_EVNTSEL_ENABLE (1 << 22) | ||
450 | #define K7_EVNTSEL_INT (1 << 20) | ||
451 | #define K7_EVNTSEL_OS (1 << 17) | ||
452 | #define K7_EVNTSEL_USR (1 << 16) | ||
453 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
454 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
455 | |||
456 | static int setup_k7_watchdog(void) | ||
376 | { | 457 | { |
458 | unsigned int perfctr_msr, evntsel_msr; | ||
377 | unsigned int evntsel; | 459 | unsigned int evntsel; |
460 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
461 | |||
462 | perfctr_msr = MSR_K7_PERFCTR0; | ||
463 | evntsel_msr = MSR_K7_EVNTSEL0; | ||
464 | if (!reserve_perfctr_nmi(perfctr_msr)) | ||
465 | goto fail; | ||
378 | 466 | ||
379 | nmi_perfctr_msr = MSR_K7_PERFCTR0; | 467 | if (!reserve_evntsel_nmi(evntsel_msr)) |
468 | goto fail1; | ||
380 | 469 | ||
381 | clear_msr_range(MSR_K7_EVNTSEL0, 4); | 470 | wrmsrl(perfctr_msr, 0UL); |
382 | clear_msr_range(MSR_K7_PERFCTR0, 4); | ||
383 | 471 | ||
384 | evntsel = K7_EVNTSEL_INT | 472 | evntsel = K7_EVNTSEL_INT |
385 | | K7_EVNTSEL_OS | 473 | | K7_EVNTSEL_OS |
386 | | K7_EVNTSEL_USR | 474 | | K7_EVNTSEL_USR |
387 | | K7_NMI_EVENT; | 475 | | K7_NMI_EVENT; |
388 | 476 | ||
389 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 477 | /* setup the timer */ |
390 | write_watchdog_counter("K7_PERFCTR0"); | 478 | wrmsr(evntsel_msr, evntsel, 0); |
479 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0"); | ||
391 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 480 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
392 | evntsel |= K7_EVNTSEL_ENABLE; | 481 | evntsel |= K7_EVNTSEL_ENABLE; |
393 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 482 | wrmsr(evntsel_msr, evntsel, 0); |
483 | |||
484 | wd->perfctr_msr = perfctr_msr; | ||
485 | wd->evntsel_msr = evntsel_msr; | ||
486 | wd->cccr_msr = 0; //unused | ||
487 | wd->check_bit = 1ULL<<63; | ||
488 | return 1; | ||
489 | fail1: | ||
490 | release_perfctr_nmi(perfctr_msr); | ||
491 | fail: | ||
492 | return 0; | ||
493 | } | ||
494 | |||
495 | static void stop_k7_watchdog(void) | ||
496 | { | ||
497 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
498 | |||
499 | wrmsr(wd->evntsel_msr, 0, 0); | ||
500 | |||
501 | release_evntsel_nmi(wd->evntsel_msr); | ||
502 | release_perfctr_nmi(wd->perfctr_msr); | ||
394 | } | 503 | } |
395 | 504 | ||
396 | static void setup_p6_watchdog(void) | 505 | #define P6_EVNTSEL0_ENABLE (1 << 22) |
506 | #define P6_EVNTSEL_INT (1 << 20) | ||
507 | #define P6_EVNTSEL_OS (1 << 17) | ||
508 | #define P6_EVNTSEL_USR (1 << 16) | ||
509 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | ||
510 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | ||
511 | |||
512 | static int setup_p6_watchdog(void) | ||
397 | { | 513 | { |
514 | unsigned int perfctr_msr, evntsel_msr; | ||
398 | unsigned int evntsel; | 515 | unsigned int evntsel; |
516 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
517 | |||
518 | perfctr_msr = MSR_P6_PERFCTR0; | ||
519 | evntsel_msr = MSR_P6_EVNTSEL0; | ||
520 | if (!reserve_perfctr_nmi(perfctr_msr)) | ||
521 | goto fail; | ||
399 | 522 | ||
400 | nmi_perfctr_msr = MSR_P6_PERFCTR0; | 523 | if (!reserve_evntsel_nmi(evntsel_msr)) |
524 | goto fail1; | ||
401 | 525 | ||
402 | clear_msr_range(MSR_P6_EVNTSEL0, 2); | 526 | wrmsrl(perfctr_msr, 0UL); |
403 | clear_msr_range(MSR_P6_PERFCTR0, 2); | ||
404 | 527 | ||
405 | evntsel = P6_EVNTSEL_INT | 528 | evntsel = P6_EVNTSEL_INT |
406 | | P6_EVNTSEL_OS | 529 | | P6_EVNTSEL_OS |
407 | | P6_EVNTSEL_USR | 530 | | P6_EVNTSEL_USR |
408 | | P6_NMI_EVENT; | 531 | | P6_NMI_EVENT; |
409 | 532 | ||
410 | wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); | 533 | /* setup the timer */ |
411 | write_watchdog_counter("P6_PERFCTR0"); | 534 | wrmsr(evntsel_msr, evntsel, 0); |
535 | write_watchdog_counter(perfctr_msr, "P6_PERFCTR0"); | ||
412 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 536 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
413 | evntsel |= P6_EVNTSEL0_ENABLE; | 537 | evntsel |= P6_EVNTSEL0_ENABLE; |
414 | wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); | 538 | wrmsr(evntsel_msr, evntsel, 0); |
539 | |||
540 | wd->perfctr_msr = perfctr_msr; | ||
541 | wd->evntsel_msr = evntsel_msr; | ||
542 | wd->cccr_msr = 0; //unused | ||
543 | wd->check_bit = 1ULL<<39; | ||
544 | return 1; | ||
545 | fail1: | ||
546 | release_perfctr_nmi(perfctr_msr); | ||
547 | fail: | ||
548 | return 0; | ||
549 | } | ||
550 | |||
551 | static void stop_p6_watchdog(void) | ||
552 | { | ||
553 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
554 | |||
555 | wrmsr(wd->evntsel_msr, 0, 0); | ||
556 | |||
557 | release_evntsel_nmi(wd->evntsel_msr); | ||
558 | release_perfctr_nmi(wd->perfctr_msr); | ||
415 | } | 559 | } |
416 | 560 | ||
561 | /* Note that these events don't tick when the CPU idles. This means | ||
562 | the frequency varies with CPU load. */ | ||
563 | |||
564 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | ||
565 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
566 | #define P4_ESCR_OS (1<<3) | ||
567 | #define P4_ESCR_USR (1<<2) | ||
568 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
569 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
570 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
571 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
572 | #define P4_CCCR_COMPARE (1<<18) | ||
573 | #define P4_CCCR_REQUIRED (3<<16) | ||
574 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
575 | #define P4_CCCR_ENABLE (1<<12) | ||
576 | #define P4_CCCR_OVF (1<<31) | ||
577 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
578 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
579 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
580 | |||
417 | static int setup_p4_watchdog(void) | 581 | static int setup_p4_watchdog(void) |
418 | { | 582 | { |
583 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | ||
584 | unsigned int evntsel, cccr_val; | ||
419 | unsigned int misc_enable, dummy; | 585 | unsigned int misc_enable, dummy; |
586 | unsigned int ht_num; | ||
587 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
420 | 588 | ||
421 | rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); | 589 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); |
422 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | 590 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) |
423 | return 0; | 591 | return 0; |
424 | 592 | ||
425 | nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; | ||
426 | nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; | ||
427 | #ifdef CONFIG_SMP | 593 | #ifdef CONFIG_SMP |
428 | if (smp_num_siblings == 2) | 594 | /* detect which hyperthread we are on */ |
429 | nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; | 595 | if (smp_num_siblings == 2) { |
596 | unsigned int ebx, apicid; | ||
597 | |||
598 | ebx = cpuid_ebx(1); | ||
599 | apicid = (ebx >> 24) & 0xff; | ||
600 | ht_num = apicid & 1; | ||
601 | } else | ||
430 | #endif | 602 | #endif |
603 | ht_num = 0; | ||
431 | 604 | ||
432 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) | 605 | /* performance counters are shared resources |
433 | clear_msr_range(0x3F1, 2); | 606 | * assign each hyperthread its own set |
434 | /* MSR 0x3F0 seems to have a default value of 0xFC00, but current | 607 | * (re-use the ESCR0 register, seems safe |
435 | docs doesn't fully define it, so leave it alone for now. */ | 608 | * and keeps the cccr_val the same) |
436 | if (boot_cpu_data.x86_model >= 0x3) { | 609 | */ |
437 | /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ | 610 | if (!ht_num) { |
438 | clear_msr_range(0x3A0, 26); | 611 | /* logical cpu 0 */ |
439 | clear_msr_range(0x3BC, 3); | 612 | perfctr_msr = MSR_P4_IQ_PERFCTR0; |
613 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
614 | cccr_msr = MSR_P4_IQ_CCCR0; | ||
615 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | ||
440 | } else { | 616 | } else { |
441 | clear_msr_range(0x3A0, 31); | 617 | /* logical cpu 1 */ |
618 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | ||
619 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
620 | cccr_msr = MSR_P4_IQ_CCCR1; | ||
621 | cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | ||
442 | } | 622 | } |
443 | clear_msr_range(0x3C0, 6); | 623 | |
444 | clear_msr_range(0x3C8, 6); | 624 | if (!reserve_perfctr_nmi(perfctr_msr)) |
445 | clear_msr_range(0x3E0, 2); | 625 | goto fail; |
446 | clear_msr_range(MSR_P4_CCCR0, 18); | 626 | |
447 | clear_msr_range(MSR_P4_PERFCTR0, 18); | 627 | if (!reserve_evntsel_nmi(evntsel_msr)) |
448 | 628 | goto fail1; | |
449 | wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); | 629 | |
450 | wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); | 630 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) |
451 | write_watchdog_counter("P4_IQ_COUNTER0"); | 631 | | P4_ESCR_OS |
632 | | P4_ESCR_USR; | ||
633 | |||
634 | cccr_val |= P4_CCCR_THRESHOLD(15) | ||
635 | | P4_CCCR_COMPLEMENT | ||
636 | | P4_CCCR_COMPARE | ||
637 | | P4_CCCR_REQUIRED; | ||
638 | |||
639 | wrmsr(evntsel_msr, evntsel, 0); | ||
640 | wrmsr(cccr_msr, cccr_val, 0); | ||
641 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0"); | ||
452 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 642 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
453 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 643 | cccr_val |= P4_CCCR_ENABLE; |
644 | wrmsr(cccr_msr, cccr_val, 0); | ||
645 | wd->perfctr_msr = perfctr_msr; | ||
646 | wd->evntsel_msr = evntsel_msr; | ||
647 | wd->cccr_msr = cccr_msr; | ||
648 | wd->check_bit = 1ULL<<39; | ||
454 | return 1; | 649 | return 1; |
650 | fail1: | ||
651 | release_perfctr_nmi(perfctr_msr); | ||
652 | fail: | ||
653 | return 0; | ||
455 | } | 654 | } |
456 | 655 | ||
457 | static void disable_intel_arch_watchdog(void) | 656 | static void stop_p4_watchdog(void) |
458 | { | 657 | { |
459 | unsigned ebx; | 658 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
460 | 659 | ||
461 | /* | 660 | wrmsr(wd->cccr_msr, 0, 0); |
462 | * Check whether the Architectural PerfMon supports | 661 | wrmsr(wd->evntsel_msr, 0, 0); |
463 | * Unhalted Core Cycles Event or not. | 662 | |
464 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | 663 | release_evntsel_nmi(wd->evntsel_msr); |
465 | */ | 664 | release_perfctr_nmi(wd->perfctr_msr); |
466 | ebx = cpuid_ebx(10); | ||
467 | if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
468 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0); | ||
469 | } | 665 | } |
470 | 666 | ||
667 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
668 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
669 | |||
471 | static int setup_intel_arch_watchdog(void) | 670 | static int setup_intel_arch_watchdog(void) |
472 | { | 671 | { |
672 | unsigned int ebx; | ||
673 | union cpuid10_eax eax; | ||
674 | unsigned int unused; | ||
675 | unsigned int perfctr_msr, evntsel_msr; | ||
473 | unsigned int evntsel; | 676 | unsigned int evntsel; |
474 | unsigned ebx; | 677 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
475 | 678 | ||
476 | /* | 679 | /* |
477 | * Check whether the Architectural PerfMon supports | 680 | * Check whether the Architectural PerfMon supports |
478 | * Unhalted Core Cycles Event or not. | 681 | * Unhalted Core Cycles Event or not. |
479 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | 682 | * NOTE: Corresponding bit = 0 in ebx indicates event present. |
480 | */ | 683 | */ |
481 | ebx = cpuid_ebx(10); | 684 | cpuid(10, &(eax.full), &ebx, &unused, &unused); |
482 | if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | 685 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || |
483 | return 0; | 686 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) |
687 | goto fail; | ||
688 | |||
689 | perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | ||
690 | evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0; | ||
484 | 691 | ||
485 | nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | 692 | if (!reserve_perfctr_nmi(perfctr_msr)) |
693 | goto fail; | ||
486 | 694 | ||
487 | clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); | 695 | if (!reserve_evntsel_nmi(evntsel_msr)) |
488 | clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); | 696 | goto fail1; |
697 | |||
698 | wrmsrl(perfctr_msr, 0UL); | ||
489 | 699 | ||
490 | evntsel = ARCH_PERFMON_EVENTSEL_INT | 700 | evntsel = ARCH_PERFMON_EVENTSEL_INT |
491 | | ARCH_PERFMON_EVENTSEL_OS | 701 | | ARCH_PERFMON_EVENTSEL_OS |
@@ -493,51 +703,145 @@ static int setup_intel_arch_watchdog(void) | |||
493 | | ARCH_PERFMON_NMI_EVENT_SEL | 703 | | ARCH_PERFMON_NMI_EVENT_SEL |
494 | | ARCH_PERFMON_NMI_EVENT_UMASK; | 704 | | ARCH_PERFMON_NMI_EVENT_UMASK; |
495 | 705 | ||
496 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | 706 | /* setup the timer */ |
497 | write_watchdog_counter("INTEL_ARCH_PERFCTR0"); | 707 | wrmsr(evntsel_msr, evntsel, 0); |
708 | write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0"); | ||
498 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 709 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
499 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 710 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
500 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | 711 | wrmsr(evntsel_msr, evntsel, 0); |
712 | |||
713 | wd->perfctr_msr = perfctr_msr; | ||
714 | wd->evntsel_msr = evntsel_msr; | ||
715 | wd->cccr_msr = 0; //unused | ||
716 | wd->check_bit = 1ULL << (eax.split.bit_width - 1); | ||
501 | return 1; | 717 | return 1; |
718 | fail1: | ||
719 | release_perfctr_nmi(perfctr_msr); | ||
720 | fail: | ||
721 | return 0; | ||
502 | } | 722 | } |
503 | 723 | ||
504 | void setup_apic_nmi_watchdog (void) | 724 | static void stop_intel_arch_watchdog(void) |
505 | { | 725 | { |
506 | switch (boot_cpu_data.x86_vendor) { | 726 | unsigned int ebx; |
507 | case X86_VENDOR_AMD: | 727 | union cpuid10_eax eax; |
508 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) | 728 | unsigned int unused; |
509 | return; | 729 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
510 | setup_k7_watchdog(); | 730 | |
511 | break; | 731 | /* |
512 | case X86_VENDOR_INTEL: | 732 | * Check whether the Architectural PerfMon supports |
513 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 733 | * Unhalted Core Cycles Event or not. |
514 | if (!setup_intel_arch_watchdog()) | 734 | * NOTE: Corresponding bit = 0 in ebx indicates event present. |
735 | */ | ||
736 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
737 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
738 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
739 | return; | ||
740 | |||
741 | wrmsr(wd->evntsel_msr, 0, 0); | ||
742 | release_evntsel_nmi(wd->evntsel_msr); | ||
743 | release_perfctr_nmi(wd->perfctr_msr); | ||
744 | } | ||
745 | |||
746 | void setup_apic_nmi_watchdog (void *unused) | ||
747 | { | ||
748 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
749 | |||
750 | /* only support LOCAL and IO APICs for now */ | ||
751 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | ||
752 | (nmi_watchdog != NMI_IO_APIC)) | ||
753 | return; | ||
754 | |||
755 | if (wd->enabled == 1) | ||
756 | return; | ||
757 | |||
758 | /* cheap hack to support suspend/resume */ | ||
759 | /* if cpu0 is not active neither should the other cpus */ | ||
760 | if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) | ||
761 | return; | ||
762 | |||
763 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
764 | switch (boot_cpu_data.x86_vendor) { | ||
765 | case X86_VENDOR_AMD: | ||
766 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) | ||
515 | return; | 767 | return; |
516 | break; | 768 | if (!setup_k7_watchdog()) |
517 | } | ||
518 | switch (boot_cpu_data.x86) { | ||
519 | case 6: | ||
520 | if (boot_cpu_data.x86_model > 0xd) | ||
521 | return; | 769 | return; |
522 | |||
523 | setup_p6_watchdog(); | ||
524 | break; | 770 | break; |
525 | case 15: | 771 | case X86_VENDOR_INTEL: |
526 | if (boot_cpu_data.x86_model > 0x4) | 772 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
527 | return; | 773 | if (!setup_intel_arch_watchdog()) |
774 | return; | ||
775 | break; | ||
776 | } | ||
777 | switch (boot_cpu_data.x86) { | ||
778 | case 6: | ||
779 | if (boot_cpu_data.x86_model > 0xd) | ||
780 | return; | ||
781 | |||
782 | if (!setup_p6_watchdog()) | ||
783 | return; | ||
784 | break; | ||
785 | case 15: | ||
786 | if (boot_cpu_data.x86_model > 0x4) | ||
787 | return; | ||
528 | 788 | ||
529 | if (!setup_p4_watchdog()) | 789 | if (!setup_p4_watchdog()) |
790 | return; | ||
791 | break; | ||
792 | default: | ||
530 | return; | 793 | return; |
794 | } | ||
531 | break; | 795 | break; |
532 | default: | 796 | default: |
533 | return; | 797 | return; |
534 | } | 798 | } |
535 | break; | 799 | } |
536 | default: | 800 | wd->enabled = 1; |
801 | atomic_inc(&nmi_active); | ||
802 | } | ||
803 | |||
804 | void stop_apic_nmi_watchdog(void *unused) | ||
805 | { | ||
806 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
807 | |||
808 | /* only support LOCAL and IO APICs for now */ | ||
809 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | ||
810 | (nmi_watchdog != NMI_IO_APIC)) | ||
811 | return; | ||
812 | |||
813 | if (wd->enabled == 0) | ||
537 | return; | 814 | return; |
815 | |||
816 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
817 | switch (boot_cpu_data.x86_vendor) { | ||
818 | case X86_VENDOR_AMD: | ||
819 | stop_k7_watchdog(); | ||
820 | break; | ||
821 | case X86_VENDOR_INTEL: | ||
822 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
823 | stop_intel_arch_watchdog(); | ||
824 | break; | ||
825 | } | ||
826 | switch (boot_cpu_data.x86) { | ||
827 | case 6: | ||
828 | if (boot_cpu_data.x86_model > 0xd) | ||
829 | break; | ||
830 | stop_p6_watchdog(); | ||
831 | break; | ||
832 | case 15: | ||
833 | if (boot_cpu_data.x86_model > 0x4) | ||
834 | break; | ||
835 | stop_p4_watchdog(); | ||
836 | break; | ||
837 | } | ||
838 | break; | ||
839 | default: | ||
840 | return; | ||
841 | } | ||
538 | } | 842 | } |
539 | lapic_nmi_owner = LAPIC_NMI_WATCHDOG; | 843 | wd->enabled = 0; |
540 | nmi_active = 1; | 844 | atomic_dec(&nmi_active); |
541 | } | 845 | } |
542 | 846 | ||
543 | /* | 847 | /* |
@@ -579,7 +883,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog); | |||
579 | 883 | ||
580 | extern void die_nmi(struct pt_regs *, const char *msg); | 884 | extern void die_nmi(struct pt_regs *, const char *msg); |
581 | 885 | ||
582 | void nmi_watchdog_tick (struct pt_regs * regs) | 886 | __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) |
583 | { | 887 | { |
584 | 888 | ||
585 | /* | 889 | /* |
@@ -588,11 +892,23 @@ void nmi_watchdog_tick (struct pt_regs * regs) | |||
588 | * smp_processor_id(). | 892 | * smp_processor_id(). |
589 | */ | 893 | */ |
590 | unsigned int sum; | 894 | unsigned int sum; |
895 | int touched = 0; | ||
591 | int cpu = smp_processor_id(); | 896 | int cpu = smp_processor_id(); |
897 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
898 | u64 dummy; | ||
899 | int rc=0; | ||
900 | |||
901 | /* check for other users first */ | ||
902 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | ||
903 | == NOTIFY_STOP) { | ||
904 | rc = 1; | ||
905 | touched = 1; | ||
906 | } | ||
592 | 907 | ||
593 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; | 908 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; |
594 | 909 | ||
595 | if (last_irq_sums[cpu] == sum) { | 910 | /* if the apic timer isn't firing, this cpu isn't doing much */ |
911 | if (!touched && last_irq_sums[cpu] == sum) { | ||
596 | /* | 912 | /* |
597 | * Ayiee, looks like this CPU is stuck ... | 913 | * Ayiee, looks like this CPU is stuck ... |
598 | * wait a few IRQs (5 seconds) before doing the oops ... | 914 | * wait a few IRQs (5 seconds) before doing the oops ... |
@@ -607,27 +923,59 @@ void nmi_watchdog_tick (struct pt_regs * regs) | |||
607 | last_irq_sums[cpu] = sum; | 923 | last_irq_sums[cpu] = sum; |
608 | alert_counter[cpu] = 0; | 924 | alert_counter[cpu] = 0; |
609 | } | 925 | } |
610 | if (nmi_perfctr_msr) { | 926 | /* see if the nmi watchdog went off */ |
611 | if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { | 927 | if (wd->enabled) { |
612 | /* | 928 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
613 | * P4 quirks: | 929 | rdmsrl(wd->perfctr_msr, dummy); |
614 | * - An overflown perfctr will assert its interrupt | 930 | if (dummy & wd->check_bit){ |
615 | * until the OVF flag in its CCCR is cleared. | 931 | /* this wasn't a watchdog timer interrupt */ |
616 | * - LVTPC is masked on interrupt and must be | 932 | goto done; |
617 | * unmasked by the LVTPC handler. | 933 | } |
934 | |||
935 | /* only Intel P4 uses the cccr msr */ | ||
936 | if (wd->cccr_msr != 0) { | ||
937 | /* | ||
938 | * P4 quirks: | ||
939 | * - An overflown perfctr will assert its interrupt | ||
940 | * until the OVF flag in its CCCR is cleared. | ||
941 | * - LVTPC is masked on interrupt and must be | ||
942 | * unmasked by the LVTPC handler. | ||
943 | */ | ||
944 | rdmsrl(wd->cccr_msr, dummy); | ||
945 | dummy &= ~P4_CCCR_OVF; | ||
946 | wrmsrl(wd->cccr_msr, dummy); | ||
947 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
948 | } | ||
949 | else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | ||
950 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
951 | /* P6 based Pentium M need to re-unmask | ||
952 | * the apic vector but it doesn't hurt | ||
953 | * other P6 variant. | ||
954 | * ArchPerfom/Core Duo also needs this */ | ||
955 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
956 | } | ||
957 | /* start the cycle over again */ | ||
958 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
959 | rc = 1; | ||
960 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
961 | /* don't know how to accurately check for this. | ||
962 | * just assume it was a watchdog timer interrupt | ||
963 | * This matches the old behaviour. | ||
618 | */ | 964 | */ |
619 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 965 | rc = 1; |
620 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
621 | } | 966 | } |
622 | else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 || | ||
623 | nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
624 | /* Only P6 based Pentium M need to re-unmask | ||
625 | * the apic vector but it doesn't hurt | ||
626 | * other P6 variant */ | ||
627 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
628 | } | ||
629 | write_watchdog_counter(NULL); | ||
630 | } | 967 | } |
968 | done: | ||
969 | return rc; | ||
970 | } | ||
971 | |||
972 | int do_nmi_callback(struct pt_regs * regs, int cpu) | ||
973 | { | ||
974 | #ifdef CONFIG_SYSCTL | ||
975 | if (unknown_nmi_panic) | ||
976 | return unknown_nmi_panic_callback(regs, cpu); | ||
977 | #endif | ||
978 | return 0; | ||
631 | } | 979 | } |
632 | 980 | ||
633 | #ifdef CONFIG_SYSCTL | 981 | #ifdef CONFIG_SYSCTL |
@@ -637,36 +985,46 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | |||
637 | unsigned char reason = get_nmi_reason(); | 985 | unsigned char reason = get_nmi_reason(); |
638 | char buf[64]; | 986 | char buf[64]; |
639 | 987 | ||
640 | if (!(reason & 0xc0)) { | 988 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); |
641 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); | 989 | die_nmi(regs, buf); |
642 | die_nmi(regs, buf); | ||
643 | } | ||
644 | return 0; | 990 | return 0; |
645 | } | 991 | } |
646 | 992 | ||
647 | /* | 993 | /* |
648 | * proc handler for /proc/sys/kernel/unknown_nmi_panic | 994 | * proc handler for /proc/sys/kernel/nmi |
649 | */ | 995 | */ |
650 | int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, | 996 | int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, |
651 | void __user *buffer, size_t *length, loff_t *ppos) | 997 | void __user *buffer, size_t *length, loff_t *ppos) |
652 | { | 998 | { |
653 | int old_state; | 999 | int old_state; |
654 | 1000 | ||
655 | old_state = unknown_nmi_panic; | 1001 | nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; |
1002 | old_state = nmi_watchdog_enabled; | ||
656 | proc_dointvec(table, write, file, buffer, length, ppos); | 1003 | proc_dointvec(table, write, file, buffer, length, ppos); |
657 | if (!!old_state == !!unknown_nmi_panic) | 1004 | if (!!old_state == !!nmi_watchdog_enabled) |
658 | return 0; | 1005 | return 0; |
659 | 1006 | ||
660 | if (unknown_nmi_panic) { | 1007 | if (atomic_read(&nmi_active) < 0) { |
661 | if (reserve_lapic_nmi() < 0) { | 1008 | printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); |
662 | unknown_nmi_panic = 0; | 1009 | return -EIO; |
663 | return -EBUSY; | 1010 | } |
664 | } else { | 1011 | |
665 | set_nmi_callback(unknown_nmi_panic_callback); | 1012 | if (nmi_watchdog == NMI_DEFAULT) { |
666 | } | 1013 | if (nmi_known_cpu() > 0) |
1014 | nmi_watchdog = NMI_LOCAL_APIC; | ||
1015 | else | ||
1016 | nmi_watchdog = NMI_IO_APIC; | ||
1017 | } | ||
1018 | |||
1019 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
1020 | if (nmi_watchdog_enabled) | ||
1021 | enable_lapic_nmi_watchdog(); | ||
1022 | else | ||
1023 | disable_lapic_nmi_watchdog(); | ||
667 | } else { | 1024 | } else { |
668 | release_lapic_nmi(); | 1025 | printk( KERN_WARNING |
669 | unset_nmi_callback(); | 1026 | "NMI watchdog doesn't know what hardware to touch\n"); |
1027 | return -EIO; | ||
670 | } | 1028 | } |
671 | return 0; | 1029 | return 0; |
672 | } | 1030 | } |
@@ -675,7 +1033,11 @@ int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, | |||
675 | 1033 | ||
676 | EXPORT_SYMBOL(nmi_active); | 1034 | EXPORT_SYMBOL(nmi_active); |
677 | EXPORT_SYMBOL(nmi_watchdog); | 1035 | EXPORT_SYMBOL(nmi_watchdog); |
678 | EXPORT_SYMBOL(reserve_lapic_nmi); | 1036 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); |
679 | EXPORT_SYMBOL(release_lapic_nmi); | 1037 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); |
1038 | EXPORT_SYMBOL(reserve_perfctr_nmi); | ||
1039 | EXPORT_SYMBOL(release_perfctr_nmi); | ||
1040 | EXPORT_SYMBOL(reserve_evntsel_nmi); | ||
1041 | EXPORT_SYMBOL(release_evntsel_nmi); | ||
680 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); | 1042 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); |
681 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); | 1043 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); |