aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/nmi.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/kernel/nmi.c')
-rw-r--r--arch/i386/kernel/nmi.c146
1 files changed, 102 insertions, 44 deletions
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 821df34d2b3a..84c3497efb60 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -41,16 +41,17 @@ int nmi_watchdog_enabled;
41 * different subsystems this reservation system just tries to coordinate 41 * different subsystems this reservation system just tries to coordinate
42 * things a little 42 * things a little
43 */ 43 */
44static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
45static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
46
47static cpumask_t backtrace_mask = CPU_MASK_NONE;
48 44
49/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 45/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
50 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) 46 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
51 */ 47 */
52#define NMI_MAX_COUNTER_BITS 66 48#define NMI_MAX_COUNTER_BITS 66
49#define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS)
50
51static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]);
52static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]);
53 53
54static cpumask_t backtrace_mask = CPU_MASK_NONE;
54/* nmi_active: 55/* nmi_active:
55 * >0: the lapic NMI watchdog is active, but can be disabled 56 * >0: the lapic NMI watchdog is active, but can be disabled
56 * <0: the lapic NMI watchdog has not been set up, and cannot 57 * <0: the lapic NMI watchdog has not been set up, and cannot
@@ -122,64 +123,129 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
122/* checks for a bit availability (hack for oprofile) */ 123/* checks for a bit availability (hack for oprofile) */
123int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) 124int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
124{ 125{
126 int cpu;
125 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 127 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
126 128 for_each_possible_cpu (cpu) {
127 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); 129 if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
130 return 0;
131 }
132 return 1;
128} 133}
129 134
130/* checks the an msr for availability */ 135/* checks the an msr for availability */
131int avail_to_resrv_perfctr_nmi(unsigned int msr) 136int avail_to_resrv_perfctr_nmi(unsigned int msr)
132{ 137{
133 unsigned int counter; 138 unsigned int counter;
139 int cpu;
134 140
135 counter = nmi_perfctr_msr_to_bit(msr); 141 counter = nmi_perfctr_msr_to_bit(msr);
136 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 142 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
137 143
138 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); 144 for_each_possible_cpu (cpu) {
145 if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
146 return 0;
147 }
148 return 1;
139} 149}
140 150
141int reserve_perfctr_nmi(unsigned int msr) 151static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
142{ 152{
143 unsigned int counter; 153 unsigned int counter;
154 if (cpu < 0)
155 cpu = smp_processor_id();
144 156
145 counter = nmi_perfctr_msr_to_bit(msr); 157 counter = nmi_perfctr_msr_to_bit(msr);
146 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 158 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
147 159
148 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner))) 160 if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
149 return 1; 161 return 1;
150 return 0; 162 return 0;
151} 163}
152 164
153void release_perfctr_nmi(unsigned int msr) 165static void __release_perfctr_nmi(int cpu, unsigned int msr)
154{ 166{
155 unsigned int counter; 167 unsigned int counter;
168 if (cpu < 0)
169 cpu = smp_processor_id();
156 170
157 counter = nmi_perfctr_msr_to_bit(msr); 171 counter = nmi_perfctr_msr_to_bit(msr);
158 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 172 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
159 173
160 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner)); 174 clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]);
161} 175}
162 176
163int reserve_evntsel_nmi(unsigned int msr) 177int reserve_perfctr_nmi(unsigned int msr)
178{
179 int cpu, i;
180 for_each_possible_cpu (cpu) {
181 if (!__reserve_perfctr_nmi(cpu, msr)) {
182 for_each_possible_cpu (i) {
183 if (i >= cpu)
184 break;
185 __release_perfctr_nmi(i, msr);
186 }
187 return 0;
188 }
189 }
190 return 1;
191}
192
193void release_perfctr_nmi(unsigned int msr)
194{
195 int cpu;
196 for_each_possible_cpu (cpu) {
197 __release_perfctr_nmi(cpu, msr);
198 }
199}
200
201int __reserve_evntsel_nmi(int cpu, unsigned int msr)
164{ 202{
165 unsigned int counter; 203 unsigned int counter;
204 if (cpu < 0)
205 cpu = smp_processor_id();
166 206
167 counter = nmi_evntsel_msr_to_bit(msr); 207 counter = nmi_evntsel_msr_to_bit(msr);
168 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 208 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
169 209
170 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0])) 210 if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
171 return 1; 211 return 1;
172 return 0; 212 return 0;
173} 213}
174 214
175void release_evntsel_nmi(unsigned int msr) 215static void __release_evntsel_nmi(int cpu, unsigned int msr)
176{ 216{
177 unsigned int counter; 217 unsigned int counter;
218 if (cpu < 0)
219 cpu = smp_processor_id();
178 220
179 counter = nmi_evntsel_msr_to_bit(msr); 221 counter = nmi_evntsel_msr_to_bit(msr);
180 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 222 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
181 223
182 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]); 224 clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
225}
226
227int reserve_evntsel_nmi(unsigned int msr)
228{
229 int cpu, i;
230 for_each_possible_cpu (cpu) {
231 if (!__reserve_evntsel_nmi(cpu, msr)) {
232 for_each_possible_cpu (i) {
233 if (i >= cpu)
234 break;
235 __release_evntsel_nmi(i, msr);
236 }
237 return 0;
238 }
239 }
240 return 1;
241}
242
243void release_evntsel_nmi(unsigned int msr)
244{
245 int cpu;
246 for_each_possible_cpu (cpu) {
247 __release_evntsel_nmi(cpu, msr);
248 }
183} 249}
184 250
185static __cpuinit inline int nmi_known_cpu(void) 251static __cpuinit inline int nmi_known_cpu(void)
@@ -245,14 +311,6 @@ static int __init check_nmi_watchdog(void)
245 unsigned int *prev_nmi_count; 311 unsigned int *prev_nmi_count;
246 int cpu; 312 int cpu;
247 313
248 /* Enable NMI watchdog for newer systems.
249 Probably safe on most older systems too, but let's be careful.
250 IBM ThinkPads use INT10 inside SMM and that allows early NMI inside SMM
251 which hangs the system. Disable watchdog for all thinkpads */
252 if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004 &&
253 !dmi_name_in_vendors("ThinkPad"))
254 nmi_watchdog = NMI_LOCAL_APIC;
255
256 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT)) 314 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
257 return 0; 315 return 0;
258 316
@@ -271,7 +329,7 @@ static int __init check_nmi_watchdog(void)
271 for_each_possible_cpu(cpu) 329 for_each_possible_cpu(cpu)
272 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; 330 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
273 local_irq_enable(); 331 local_irq_enable();
274 mdelay((10*1000)/nmi_hz); // wait 10 ticks 332 mdelay((20*1000)/nmi_hz); // wait 20 ticks
275 333
276 for_each_possible_cpu(cpu) { 334 for_each_possible_cpu(cpu) {
277#ifdef CONFIG_SMP 335#ifdef CONFIG_SMP
@@ -515,10 +573,10 @@ static int setup_k7_watchdog(void)
515 573
516 perfctr_msr = MSR_K7_PERFCTR0; 574 perfctr_msr = MSR_K7_PERFCTR0;
517 evntsel_msr = MSR_K7_EVNTSEL0; 575 evntsel_msr = MSR_K7_EVNTSEL0;
518 if (!reserve_perfctr_nmi(perfctr_msr)) 576 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
519 goto fail; 577 goto fail;
520 578
521 if (!reserve_evntsel_nmi(evntsel_msr)) 579 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
522 goto fail1; 580 goto fail1;
523 581
524 wrmsrl(perfctr_msr, 0UL); 582 wrmsrl(perfctr_msr, 0UL);
@@ -541,7 +599,7 @@ static int setup_k7_watchdog(void)
541 wd->check_bit = 1ULL<<63; 599 wd->check_bit = 1ULL<<63;
542 return 1; 600 return 1;
543fail1: 601fail1:
544 release_perfctr_nmi(perfctr_msr); 602 __release_perfctr_nmi(-1, perfctr_msr);
545fail: 603fail:
546 return 0; 604 return 0;
547} 605}
@@ -552,8 +610,8 @@ static void stop_k7_watchdog(void)
552 610
553 wrmsr(wd->evntsel_msr, 0, 0); 611 wrmsr(wd->evntsel_msr, 0, 0);
554 612
555 release_evntsel_nmi(wd->evntsel_msr); 613 __release_evntsel_nmi(-1, wd->evntsel_msr);
556 release_perfctr_nmi(wd->perfctr_msr); 614 __release_perfctr_nmi(-1, wd->perfctr_msr);
557} 615}
558 616
559#define P6_EVNTSEL0_ENABLE (1 << 22) 617#define P6_EVNTSEL0_ENABLE (1 << 22)
@@ -571,10 +629,10 @@ static int setup_p6_watchdog(void)
571 629
572 perfctr_msr = MSR_P6_PERFCTR0; 630 perfctr_msr = MSR_P6_PERFCTR0;
573 evntsel_msr = MSR_P6_EVNTSEL0; 631 evntsel_msr = MSR_P6_EVNTSEL0;
574 if (!reserve_perfctr_nmi(perfctr_msr)) 632 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
575 goto fail; 633 goto fail;
576 634
577 if (!reserve_evntsel_nmi(evntsel_msr)) 635 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
578 goto fail1; 636 goto fail1;
579 637
580 wrmsrl(perfctr_msr, 0UL); 638 wrmsrl(perfctr_msr, 0UL);
@@ -598,7 +656,7 @@ static int setup_p6_watchdog(void)
598 wd->check_bit = 1ULL<<39; 656 wd->check_bit = 1ULL<<39;
599 return 1; 657 return 1;
600fail1: 658fail1:
601 release_perfctr_nmi(perfctr_msr); 659 __release_perfctr_nmi(-1, perfctr_msr);
602fail: 660fail:
603 return 0; 661 return 0;
604} 662}
@@ -609,8 +667,8 @@ static void stop_p6_watchdog(void)
609 667
610 wrmsr(wd->evntsel_msr, 0, 0); 668 wrmsr(wd->evntsel_msr, 0, 0);
611 669
612 release_evntsel_nmi(wd->evntsel_msr); 670 __release_evntsel_nmi(-1, wd->evntsel_msr);
613 release_perfctr_nmi(wd->perfctr_msr); 671 __release_perfctr_nmi(-1, wd->perfctr_msr);
614} 672}
615 673
616/* Note that these events don't tick when the CPU idles. This means 674/* Note that these events don't tick when the CPU idles. This means
@@ -676,10 +734,10 @@ static int setup_p4_watchdog(void)
676 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); 734 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
677 } 735 }
678 736
679 if (!reserve_perfctr_nmi(perfctr_msr)) 737 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
680 goto fail; 738 goto fail;
681 739
682 if (!reserve_evntsel_nmi(evntsel_msr)) 740 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
683 goto fail1; 741 goto fail1;
684 742
685 evntsel = P4_ESCR_EVENT_SELECT(0x3F) 743 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
@@ -703,7 +761,7 @@ static int setup_p4_watchdog(void)
703 wd->check_bit = 1ULL<<39; 761 wd->check_bit = 1ULL<<39;
704 return 1; 762 return 1;
705fail1: 763fail1:
706 release_perfctr_nmi(perfctr_msr); 764 __release_perfctr_nmi(-1, perfctr_msr);
707fail: 765fail:
708 return 0; 766 return 0;
709} 767}
@@ -715,8 +773,8 @@ static void stop_p4_watchdog(void)
715 wrmsr(wd->cccr_msr, 0, 0); 773 wrmsr(wd->cccr_msr, 0, 0);
716 wrmsr(wd->evntsel_msr, 0, 0); 774 wrmsr(wd->evntsel_msr, 0, 0);
717 775
718 release_evntsel_nmi(wd->evntsel_msr); 776 __release_evntsel_nmi(-1, wd->evntsel_msr);
719 release_perfctr_nmi(wd->perfctr_msr); 777 __release_perfctr_nmi(-1, wd->perfctr_msr);
720} 778}
721 779
722#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 780#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
@@ -744,10 +802,10 @@ static int setup_intel_arch_watchdog(void)
744 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; 802 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
745 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0; 803 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
746 804
747 if (!reserve_perfctr_nmi(perfctr_msr)) 805 if (!__reserve_perfctr_nmi(-1, perfctr_msr))
748 goto fail; 806 goto fail;
749 807
750 if (!reserve_evntsel_nmi(evntsel_msr)) 808 if (!__reserve_evntsel_nmi(-1, evntsel_msr))
751 goto fail1; 809 goto fail1;
752 810
753 wrmsrl(perfctr_msr, 0UL); 811 wrmsrl(perfctr_msr, 0UL);
@@ -772,7 +830,7 @@ static int setup_intel_arch_watchdog(void)
772 wd->check_bit = 1ULL << (eax.split.bit_width - 1); 830 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
773 return 1; 831 return 1;
774fail1: 832fail1:
775 release_perfctr_nmi(perfctr_msr); 833 __release_perfctr_nmi(-1, perfctr_msr);
776fail: 834fail:
777 return 0; 835 return 0;
778} 836}
@@ -795,8 +853,8 @@ static void stop_intel_arch_watchdog(void)
795 return; 853 return;
796 854
797 wrmsr(wd->evntsel_msr, 0, 0); 855 wrmsr(wd->evntsel_msr, 0, 0);
798 release_evntsel_nmi(wd->evntsel_msr); 856 __release_evntsel_nmi(-1, wd->evntsel_msr);
799 release_perfctr_nmi(wd->perfctr_msr); 857 __release_perfctr_nmi(-1, wd->perfctr_msr);
800} 858}
801 859
802void setup_apic_nmi_watchdog (void *unused) 860void setup_apic_nmi_watchdog (void *unused)