aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perfctr-watchdog.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/perfctr-watchdog.c')
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c210
1 files changed, 117 insertions, 93 deletions
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index f9ae93adffe5..6d4bdc02388a 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -1,11 +1,15 @@
1/* local apic based NMI watchdog for various CPUs. 1/*
2 This file also handles reservation of performance counters for coordination 2 * local apic based NMI watchdog for various CPUs.
3 with other users (like oprofile). 3 *
4 4 * This file also handles reservation of performance counters for coordination
5 Note that these events normally don't tick when the CPU idles. This means 5 * with other users (like oprofile).
6 the frequency varies with CPU load. 6 *
7 7 * Note that these events normally don't tick when the CPU idles. This means
8 Original code for K7/P6 written by Keith Owens */ 8 * the frequency varies with CPU load.
9 *
10 * Original code for K7/P6 written by Keith Owens
11 *
12 */
9 13
10#include <linux/percpu.h> 14#include <linux/percpu.h>
11#include <linux/module.h> 15#include <linux/module.h>
@@ -36,12 +40,16 @@ struct wd_ops {
36 40
37static const struct wd_ops *wd_ops; 41static const struct wd_ops *wd_ops;
38 42
39/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 43/*
40 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) 44 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
45 * offset from MSR_P4_BSU_ESCR0.
46 *
47 * It will be the max for all platforms (for now)
41 */ 48 */
42#define NMI_MAX_COUNTER_BITS 66 49#define NMI_MAX_COUNTER_BITS 66
43 50
44/* perfctr_nmi_owner tracks the ownership of the perfctr registers: 51/*
52 * perfctr_nmi_owner tracks the ownership of the perfctr registers:
45 * evtsel_nmi_owner tracks the ownership of the event selection 53 * evtsel_nmi_owner tracks the ownership of the event selection
46 * - different performance counters/ event selection may be reserved for 54 * - different performance counters/ event selection may be reserved for
47 * different subsystems this reservation system just tries to coordinate 55 * different subsystems this reservation system just tries to coordinate
@@ -73,8 +81,10 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
73 return 0; 81 return 0;
74} 82}
75 83
76/* converts an msr to an appropriate reservation bit */ 84/*
77/* returns the bit offset of the event selection register */ 85 * converts an msr to an appropriate reservation bit
86 * returns the bit offset of the event selection register
87 */
78static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) 88static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
79{ 89{
80 /* returns the bit offset of the event selection register */ 90 /* returns the bit offset of the event selection register */
@@ -114,6 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr)
114 124
115 return (!test_bit(counter, perfctr_nmi_owner)); 125 return (!test_bit(counter, perfctr_nmi_owner));
116} 126}
127EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
117 128
118int reserve_perfctr_nmi(unsigned int msr) 129int reserve_perfctr_nmi(unsigned int msr)
119{ 130{
@@ -128,6 +139,7 @@ int reserve_perfctr_nmi(unsigned int msr)
128 return 1; 139 return 1;
129 return 0; 140 return 0;
130} 141}
142EXPORT_SYMBOL(reserve_perfctr_nmi);
131 143
132void release_perfctr_nmi(unsigned int msr) 144void release_perfctr_nmi(unsigned int msr)
133{ 145{
@@ -140,6 +152,7 @@ void release_perfctr_nmi(unsigned int msr)
140 152
141 clear_bit(counter, perfctr_nmi_owner); 153 clear_bit(counter, perfctr_nmi_owner);
142} 154}
155EXPORT_SYMBOL(release_perfctr_nmi);
143 156
144int reserve_evntsel_nmi(unsigned int msr) 157int reserve_evntsel_nmi(unsigned int msr)
145{ 158{
@@ -154,6 +167,7 @@ int reserve_evntsel_nmi(unsigned int msr)
154 return 1; 167 return 1;
155 return 0; 168 return 0;
156} 169}
170EXPORT_SYMBOL(reserve_evntsel_nmi);
157 171
158void release_evntsel_nmi(unsigned int msr) 172void release_evntsel_nmi(unsigned int msr)
159{ 173{
@@ -166,11 +180,6 @@ void release_evntsel_nmi(unsigned int msr)
166 180
167 clear_bit(counter, evntsel_nmi_owner); 181 clear_bit(counter, evntsel_nmi_owner);
168} 182}
169
170EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
171EXPORT_SYMBOL(reserve_perfctr_nmi);
172EXPORT_SYMBOL(release_perfctr_nmi);
173EXPORT_SYMBOL(reserve_evntsel_nmi);
174EXPORT_SYMBOL(release_evntsel_nmi); 183EXPORT_SYMBOL(release_evntsel_nmi);
175 184
176void disable_lapic_nmi_watchdog(void) 185void disable_lapic_nmi_watchdog(void)
@@ -180,8 +189,10 @@ void disable_lapic_nmi_watchdog(void)
180 if (atomic_read(&nmi_active) <= 0) 189 if (atomic_read(&nmi_active) <= 0)
181 return; 190 return;
182 191
183 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); 192 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
184 wd_ops->unreserve(); 193
194 if (wd_ops)
195 wd_ops->unreserve();
185 196
186 BUG_ON(atomic_read(&nmi_active) != 0); 197 BUG_ON(atomic_read(&nmi_active) != 0);
187} 198}
@@ -202,7 +213,7 @@ void enable_lapic_nmi_watchdog(void)
202 return; 213 return;
203 } 214 }
204 215
205 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); 216 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
206 touch_nmi_watchdog(); 217 touch_nmi_watchdog();
207} 218}
208 219
@@ -232,8 +243,8 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz)
232 return retval; 243 return retval;
233} 244}
234 245
235static void 246static void write_watchdog_counter(unsigned int perfctr_msr,
236write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz) 247 const char *descr, unsigned nmi_hz)
237{ 248{
238 u64 count = (u64)cpu_khz * 1000; 249 u64 count = (u64)cpu_khz * 1000;
239 250
@@ -244,7 +255,7 @@ write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi
244} 255}
245 256
246static void write_watchdog_counter32(unsigned int perfctr_msr, 257static void write_watchdog_counter32(unsigned int perfctr_msr,
247 const char *descr, unsigned nmi_hz) 258 const char *descr, unsigned nmi_hz)
248{ 259{
249 u64 count = (u64)cpu_khz * 1000; 260 u64 count = (u64)cpu_khz * 1000;
250 261
@@ -254,9 +265,10 @@ static void write_watchdog_counter32(unsigned int perfctr_msr,
254 wrmsr(perfctr_msr, (u32)(-count), 0); 265 wrmsr(perfctr_msr, (u32)(-count), 0);
255} 266}
256 267
257/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface 268/*
258 nicely stable so there is not much variety */ 269 * AMD K7/K8/Family10h/Family11h support.
259 270 * AMD keeps this interface nicely stable so there is not much variety
271 */
260#define K7_EVNTSEL_ENABLE (1 << 22) 272#define K7_EVNTSEL_ENABLE (1 << 22)
261#define K7_EVNTSEL_INT (1 << 20) 273#define K7_EVNTSEL_INT (1 << 20)
262#define K7_EVNTSEL_OS (1 << 17) 274#define K7_EVNTSEL_OS (1 << 17)
@@ -289,7 +301,7 @@ static int setup_k7_watchdog(unsigned nmi_hz)
289 301
290 wd->perfctr_msr = perfctr_msr; 302 wd->perfctr_msr = perfctr_msr;
291 wd->evntsel_msr = evntsel_msr; 303 wd->evntsel_msr = evntsel_msr;
292 wd->cccr_msr = 0; //unused 304 wd->cccr_msr = 0; /* unused */
293 return 1; 305 return 1;
294} 306}
295 307
@@ -325,18 +337,19 @@ static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
325} 337}
326 338
327static const struct wd_ops k7_wd_ops = { 339static const struct wd_ops k7_wd_ops = {
328 .reserve = single_msr_reserve, 340 .reserve = single_msr_reserve,
329 .unreserve = single_msr_unreserve, 341 .unreserve = single_msr_unreserve,
330 .setup = setup_k7_watchdog, 342 .setup = setup_k7_watchdog,
331 .rearm = single_msr_rearm, 343 .rearm = single_msr_rearm,
332 .stop = single_msr_stop_watchdog, 344 .stop = single_msr_stop_watchdog,
333 .perfctr = MSR_K7_PERFCTR0, 345 .perfctr = MSR_K7_PERFCTR0,
334 .evntsel = MSR_K7_EVNTSEL0, 346 .evntsel = MSR_K7_EVNTSEL0,
335 .checkbit = 1ULL<<47, 347 .checkbit = 1ULL << 47,
336}; 348};
337 349
338/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */ 350/*
339 351 * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
352 */
340#define P6_EVNTSEL0_ENABLE (1 << 22) 353#define P6_EVNTSEL0_ENABLE (1 << 22)
341#define P6_EVNTSEL_INT (1 << 20) 354#define P6_EVNTSEL_INT (1 << 20)
342#define P6_EVNTSEL_OS (1 << 17) 355#define P6_EVNTSEL_OS (1 << 17)
@@ -372,52 +385,58 @@ static int setup_p6_watchdog(unsigned nmi_hz)
372 385
373 wd->perfctr_msr = perfctr_msr; 386 wd->perfctr_msr = perfctr_msr;
374 wd->evntsel_msr = evntsel_msr; 387 wd->evntsel_msr = evntsel_msr;
375 wd->cccr_msr = 0; //unused 388 wd->cccr_msr = 0; /* unused */
376 return 1; 389 return 1;
377} 390}
378 391
379static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 392static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
380{ 393{
381 /* P6 based Pentium M need to re-unmask 394 /*
395 * P6 based Pentium M need to re-unmask
382 * the apic vector but it doesn't hurt 396 * the apic vector but it doesn't hurt
383 * other P6 variant. 397 * other P6 variant.
384 * ArchPerfom/Core Duo also needs this */ 398 * ArchPerfom/Core Duo also needs this
399 */
385 apic_write(APIC_LVTPC, APIC_DM_NMI); 400 apic_write(APIC_LVTPC, APIC_DM_NMI);
401
386 /* P6/ARCH_PERFMON has 32 bit counter write */ 402 /* P6/ARCH_PERFMON has 32 bit counter write */
387 write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); 403 write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
388} 404}
389 405
390static const struct wd_ops p6_wd_ops = { 406static const struct wd_ops p6_wd_ops = {
391 .reserve = single_msr_reserve, 407 .reserve = single_msr_reserve,
392 .unreserve = single_msr_unreserve, 408 .unreserve = single_msr_unreserve,
393 .setup = setup_p6_watchdog, 409 .setup = setup_p6_watchdog,
394 .rearm = p6_rearm, 410 .rearm = p6_rearm,
395 .stop = single_msr_stop_watchdog, 411 .stop = single_msr_stop_watchdog,
396 .perfctr = MSR_P6_PERFCTR0, 412 .perfctr = MSR_P6_PERFCTR0,
397 .evntsel = MSR_P6_EVNTSEL0, 413 .evntsel = MSR_P6_EVNTSEL0,
398 .checkbit = 1ULL<<39, 414 .checkbit = 1ULL << 39,
399}; 415};
400 416
401/* Intel P4 performance counters. By far the most complicated of all. */ 417/*
402 418 * Intel P4 performance counters.
403#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) 419 * By far the most complicated of all.
404#define P4_ESCR_EVENT_SELECT(N) ((N)<<25) 420 */
405#define P4_ESCR_OS (1<<3) 421#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
406#define P4_ESCR_USR (1<<2) 422#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
407#define P4_CCCR_OVF_PMI0 (1<<26) 423#define P4_ESCR_OS (1 << 3)
408#define P4_CCCR_OVF_PMI1 (1<<27) 424#define P4_ESCR_USR (1 << 2)
409#define P4_CCCR_THRESHOLD(N) ((N)<<20) 425#define P4_CCCR_OVF_PMI0 (1 << 26)
410#define P4_CCCR_COMPLEMENT (1<<19) 426#define P4_CCCR_OVF_PMI1 (1 << 27)
411#define P4_CCCR_COMPARE (1<<18) 427#define P4_CCCR_THRESHOLD(N) ((N) << 20)
412#define P4_CCCR_REQUIRED (3<<16) 428#define P4_CCCR_COMPLEMENT (1 << 19)
413#define P4_CCCR_ESCR_SELECT(N) ((N)<<13) 429#define P4_CCCR_COMPARE (1 << 18)
414#define P4_CCCR_ENABLE (1<<12) 430#define P4_CCCR_REQUIRED (3 << 16)
415#define P4_CCCR_OVF (1<<31) 431#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
416 432#define P4_CCCR_ENABLE (1 << 12)
417/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter 433#define P4_CCCR_OVF (1 << 31)
418 CRU_ESCR0 (with any non-null event selector) through a complemented
419 max threshold. [IA32-Vol3, Section 14.9.9] */
420 434
435/*
436 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
437 * CRU_ESCR0 (with any non-null event selector) through a complemented
438 * max threshold. [IA32-Vol3, Section 14.9.9]
439 */
421static int setup_p4_watchdog(unsigned nmi_hz) 440static int setup_p4_watchdog(unsigned nmi_hz)
422{ 441{
423 unsigned int perfctr_msr, evntsel_msr, cccr_msr; 442 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
@@ -442,7 +461,8 @@ static int setup_p4_watchdog(unsigned nmi_hz)
442#endif 461#endif
443 ht_num = 0; 462 ht_num = 0;
444 463
445 /* performance counters are shared resources 464 /*
465 * performance counters are shared resources
446 * assign each hyperthread its own set 466 * assign each hyperthread its own set
447 * (re-use the ESCR0 register, seems safe 467 * (re-use the ESCR0 register, seems safe
448 * and keeps the cccr_val the same) 468 * and keeps the cccr_val the same)
@@ -540,20 +560,21 @@ static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
540} 560}
541 561
542static const struct wd_ops p4_wd_ops = { 562static const struct wd_ops p4_wd_ops = {
543 .reserve = p4_reserve, 563 .reserve = p4_reserve,
544 .unreserve = p4_unreserve, 564 .unreserve = p4_unreserve,
545 .setup = setup_p4_watchdog, 565 .setup = setup_p4_watchdog,
546 .rearm = p4_rearm, 566 .rearm = p4_rearm,
547 .stop = stop_p4_watchdog, 567 .stop = stop_p4_watchdog,
548 /* RED-PEN this is wrong for the other sibling */ 568 /* RED-PEN this is wrong for the other sibling */
549 .perfctr = MSR_P4_BPU_PERFCTR0, 569 .perfctr = MSR_P4_BPU_PERFCTR0,
550 .evntsel = MSR_P4_BSU_ESCR0, 570 .evntsel = MSR_P4_BSU_ESCR0,
551 .checkbit = 1ULL<<39, 571 .checkbit = 1ULL << 39,
552}; 572};
553 573
554/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully 574/*
555 all future Intel CPUs. */ 575 * Watchdog using the Intel architected PerfMon.
556 576 * Used for Core2 and hopefully all future Intel CPUs.
577 */
557#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 578#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
558#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK 579#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
559 580
@@ -599,19 +620,19 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
599 620
600 wd->perfctr_msr = perfctr_msr; 621 wd->perfctr_msr = perfctr_msr;
601 wd->evntsel_msr = evntsel_msr; 622 wd->evntsel_msr = evntsel_msr;
602 wd->cccr_msr = 0; //unused 623 wd->cccr_msr = 0; /* unused */
603 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); 624 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
604 return 1; 625 return 1;
605} 626}
606 627
607static struct wd_ops intel_arch_wd_ops __read_mostly = { 628static struct wd_ops intel_arch_wd_ops __read_mostly = {
608 .reserve = single_msr_reserve, 629 .reserve = single_msr_reserve,
609 .unreserve = single_msr_unreserve, 630 .unreserve = single_msr_unreserve,
610 .setup = setup_intel_arch_watchdog, 631 .setup = setup_intel_arch_watchdog,
611 .rearm = p6_rearm, 632 .rearm = p6_rearm,
612 .stop = single_msr_stop_watchdog, 633 .stop = single_msr_stop_watchdog,
613 .perfctr = MSR_ARCH_PERFMON_PERFCTR1, 634 .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
614 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, 635 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
615}; 636};
616 637
617static void probe_nmi_watchdog(void) 638static void probe_nmi_watchdog(void)
@@ -624,8 +645,10 @@ static void probe_nmi_watchdog(void)
624 wd_ops = &k7_wd_ops; 645 wd_ops = &k7_wd_ops;
625 break; 646 break;
626 case X86_VENDOR_INTEL: 647 case X86_VENDOR_INTEL:
627 /* Work around Core Duo (Yonah) errata AE49 where perfctr1 648 /*
628 doesn't have a working enable bit. */ 649 * Work around Core Duo (Yonah) errata AE49 where perfctr1
650 * doesn't have a working enable bit.
651 */
629 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { 652 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
630 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; 653 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
631 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; 654 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
@@ -636,7 +659,7 @@ static void probe_nmi_watchdog(void)
636 } 659 }
637 switch (boot_cpu_data.x86) { 660 switch (boot_cpu_data.x86) {
638 case 6: 661 case 6:
639 if (boot_cpu_data.x86_model > 0xd) 662 if (boot_cpu_data.x86_model > 13)
640 return; 663 return;
641 664
642 wd_ops = &p6_wd_ops; 665 wd_ops = &p6_wd_ops;
@@ -697,10 +720,11 @@ int lapic_wd_event(unsigned nmi_hz)
697{ 720{
698 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 721 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
699 u64 ctr; 722 u64 ctr;
723
700 rdmsrl(wd->perfctr_msr, ctr); 724 rdmsrl(wd->perfctr_msr, ctr);
701 if (ctr & wd_ops->checkbit) { /* perfctr still running? */ 725 if (ctr & wd_ops->checkbit) /* perfctr still running? */
702 return 0; 726 return 0;
703 } 727
704 wd_ops->rearm(wd, nmi_hz); 728 wd_ops->rearm(wd, nmi_hz);
705 return 1; 729 return 1;
706} 730}