diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
59 files changed, 6527 insertions, 2496 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c202b62f3671..3f0ebe429a01 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -12,11 +12,11 @@ endif | |||
12 | nostackp := $(call cc-option, -fno-stack-protector) | 12 | nostackp := $(call cc-option, -fno-stack-protector) |
13 | CFLAGS_common.o := $(nostackp) | 13 | CFLAGS_common.o := $(nostackp) |
14 | 14 | ||
15 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 15 | obj-y := intel_cacheinfo.o scattered.o topology.o |
16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
17 | obj-y += vmware.o hypervisor.o sched.o | 17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o |
18 | 18 | ||
19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 19 | obj-$(CONFIG_X86_32) += bugs.o |
20 | obj-$(CONFIG_X86_64) += bugs_64.o | 20 | obj-$(CONFIG_X86_64) += bugs_64.o |
21 | 21 | ||
22 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o | 22 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e485825130d2..ba5f62f45f01 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -466,7 +466,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
466 | } | 466 | } |
467 | 467 | ||
468 | } | 468 | } |
469 | if (c->x86 == 0x10 || c->x86 == 0x11) | 469 | if (c->x86 >= 0x10) |
470 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 470 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
471 | 471 | ||
472 | /* get apicid instead of initial apic id from cpuid */ | 472 | /* get apicid instead of initial apic id from cpuid */ |
@@ -529,7 +529,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
529 | num_cache_leaves = 3; | 529 | num_cache_leaves = 3; |
530 | } | 530 | } |
531 | 531 | ||
532 | if (c->x86 >= 0xf && c->x86 <= 0x11) | 532 | if (c->x86 >= 0xf) |
533 | set_cpu_cap(c, X86_FEATURE_K8); | 533 | set_cpu_cap(c, X86_FEATURE_K8); |
534 | 534 | ||
535 | if (cpu_has_xmm2) { | 535 | if (cpu_has_xmm2) { |
@@ -546,7 +546,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
546 | fam10h_check_enable_mmcfg(); | 546 | fam10h_check_enable_mmcfg(); |
547 | } | 547 | } |
548 | 548 | ||
549 | if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { | 549 | if (c == &boot_cpu_data && c->x86 >= 0xf) { |
550 | unsigned long long tseg; | 550 | unsigned long long tseg; |
551 | 551 | ||
552 | /* | 552 | /* |
@@ -609,3 +609,74 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { | |||
609 | }; | 609 | }; |
610 | 610 | ||
611 | cpu_dev_register(amd_cpu_dev); | 611 | cpu_dev_register(amd_cpu_dev); |
612 | |||
613 | /* | ||
614 | * AMD errata checking | ||
615 | * | ||
616 | * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or | ||
617 | * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that | ||
618 | * have an OSVW id assigned, which it takes as first argument. Both take a | ||
619 | * variable number of family-specific model-stepping ranges created by | ||
620 | * AMD_MODEL_RANGE(). Each erratum also has to be declared as extern const | ||
621 | * int[] in arch/x86/include/asm/processor.h. | ||
622 | * | ||
623 | * Example: | ||
624 | * | ||
625 | * const int amd_erratum_319[] = | ||
626 | * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), | ||
627 | * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), | ||
628 | * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)); | ||
629 | */ | ||
630 | |||
631 | const int amd_erratum_400[] = | ||
632 | AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), | ||
633 | AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); | ||
634 | EXPORT_SYMBOL_GPL(amd_erratum_400); | ||
635 | |||
636 | const int amd_erratum_383[] = | ||
637 | AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); | ||
638 | EXPORT_SYMBOL_GPL(amd_erratum_383); | ||
639 | |||
640 | bool cpu_has_amd_erratum(const int *erratum) | ||
641 | { | ||
642 | struct cpuinfo_x86 *cpu = ¤t_cpu_data; | ||
643 | int osvw_id = *erratum++; | ||
644 | u32 range; | ||
645 | u32 ms; | ||
646 | |||
647 | /* | ||
648 | * If called early enough that current_cpu_data hasn't been initialized | ||
649 | * yet, fall back to boot_cpu_data. | ||
650 | */ | ||
651 | if (cpu->x86 == 0) | ||
652 | cpu = &boot_cpu_data; | ||
653 | |||
654 | if (cpu->x86_vendor != X86_VENDOR_AMD) | ||
655 | return false; | ||
656 | |||
657 | if (osvw_id >= 0 && osvw_id < 65536 && | ||
658 | cpu_has(cpu, X86_FEATURE_OSVW)) { | ||
659 | u64 osvw_len; | ||
660 | |||
661 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len); | ||
662 | if (osvw_id < osvw_len) { | ||
663 | u64 osvw_bits; | ||
664 | |||
665 | rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6), | ||
666 | osvw_bits); | ||
667 | return osvw_bits & (1ULL << (osvw_id & 0x3f)); | ||
668 | } | ||
669 | } | ||
670 | |||
671 | /* OSVW unavailable or ID unknown, match family-model-stepping range */ | ||
672 | ms = (cpu->x86_model << 4) | cpu->x86_mask; | ||
673 | while ((range = *erratum++)) | ||
674 | if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && | ||
675 | (ms >= AMD_MODEL_RANGE_START(range)) && | ||
676 | (ms <= AMD_MODEL_RANGE_END(range))) | ||
677 | return true; | ||
678 | |||
679 | return false; | ||
680 | } | ||
681 | |||
682 | EXPORT_SYMBOL_GPL(cpu_has_amd_erratum); | ||
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 01a265212395..c39576cb3018 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -86,7 +86,7 @@ static void __init check_fpu(void) | |||
86 | 86 | ||
87 | static void __init check_hlt(void) | 87 | static void __init check_hlt(void) |
88 | { | 88 | { |
89 | if (paravirt_enabled()) | 89 | if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) |
90 | return; | 90 | return; |
91 | 91 | ||
92 | printk(KERN_INFO "Checking 'hlt' instruction... "); | 92 | printk(KERN_INFO "Checking 'hlt' instruction... "); |
diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c deleted file mode 100644 index 2056ccf572cc..000000000000 --- a/arch/x86/kernel/cpu/cmpxchg.c +++ /dev/null | |||
@@ -1,72 +0,0 @@ | |||
1 | /* | ||
2 | * cmpxchg*() fallbacks for CPU not supporting these instructions | ||
3 | */ | ||
4 | |||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/smp.h> | ||
7 | #include <linux/module.h> | ||
8 | |||
9 | #ifndef CONFIG_X86_CMPXCHG | ||
10 | unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) | ||
11 | { | ||
12 | u8 prev; | ||
13 | unsigned long flags; | ||
14 | |||
15 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
16 | local_irq_save(flags); | ||
17 | prev = *(u8 *)ptr; | ||
18 | if (prev == old) | ||
19 | *(u8 *)ptr = new; | ||
20 | local_irq_restore(flags); | ||
21 | return prev; | ||
22 | } | ||
23 | EXPORT_SYMBOL(cmpxchg_386_u8); | ||
24 | |||
25 | unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) | ||
26 | { | ||
27 | u16 prev; | ||
28 | unsigned long flags; | ||
29 | |||
30 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
31 | local_irq_save(flags); | ||
32 | prev = *(u16 *)ptr; | ||
33 | if (prev == old) | ||
34 | *(u16 *)ptr = new; | ||
35 | local_irq_restore(flags); | ||
36 | return prev; | ||
37 | } | ||
38 | EXPORT_SYMBOL(cmpxchg_386_u16); | ||
39 | |||
40 | unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) | ||
41 | { | ||
42 | u32 prev; | ||
43 | unsigned long flags; | ||
44 | |||
45 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
46 | local_irq_save(flags); | ||
47 | prev = *(u32 *)ptr; | ||
48 | if (prev == old) | ||
49 | *(u32 *)ptr = new; | ||
50 | local_irq_restore(flags); | ||
51 | return prev; | ||
52 | } | ||
53 | EXPORT_SYMBOL(cmpxchg_386_u32); | ||
54 | #endif | ||
55 | |||
56 | #ifndef CONFIG_X86_CMPXCHG64 | ||
57 | unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) | ||
58 | { | ||
59 | u64 prev; | ||
60 | unsigned long flags; | ||
61 | |||
62 | /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ | ||
63 | local_irq_save(flags); | ||
64 | prev = *(u64 *)ptr; | ||
65 | if (prev == old) | ||
66 | *(u64 *)ptr = new; | ||
67 | local_irq_restore(flags); | ||
68 | return prev; | ||
69 | } | ||
70 | EXPORT_SYMBOL(cmpxchg_486_u64); | ||
71 | #endif | ||
72 | |||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4868e4a951ee..f2f9ac7da25c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -140,10 +140,18 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | |||
140 | static int __init x86_xsave_setup(char *s) | 140 | static int __init x86_xsave_setup(char *s) |
141 | { | 141 | { |
142 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | 142 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); |
143 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
143 | return 1; | 144 | return 1; |
144 | } | 145 | } |
145 | __setup("noxsave", x86_xsave_setup); | 146 | __setup("noxsave", x86_xsave_setup); |
146 | 147 | ||
148 | static int __init x86_xsaveopt_setup(char *s) | ||
149 | { | ||
150 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
151 | return 1; | ||
152 | } | ||
153 | __setup("noxsaveopt", x86_xsaveopt_setup); | ||
154 | |||
147 | #ifdef CONFIG_X86_32 | 155 | #ifdef CONFIG_X86_32 |
148 | static int cachesize_override __cpuinitdata = -1; | 156 | static int cachesize_override __cpuinitdata = -1; |
149 | static int disable_x86_serial_nr __cpuinitdata = 1; | 157 | static int disable_x86_serial_nr __cpuinitdata = 1; |
@@ -537,7 +545,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) | |||
537 | } | 545 | } |
538 | } | 546 | } |
539 | 547 | ||
540 | static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | 548 | void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) |
541 | { | 549 | { |
542 | u32 tfms, xlvl; | 550 | u32 tfms, xlvl; |
543 | u32 ebx; | 551 | u32 ebx; |
@@ -551,6 +559,16 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | |||
551 | c->x86_capability[4] = excap; | 559 | c->x86_capability[4] = excap; |
552 | } | 560 | } |
553 | 561 | ||
562 | /* Additional Intel-defined flags: level 0x00000007 */ | ||
563 | if (c->cpuid_level >= 0x00000007) { | ||
564 | u32 eax, ebx, ecx, edx; | ||
565 | |||
566 | cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); | ||
567 | |||
568 | if (eax > 0) | ||
569 | c->x86_capability[9] = ebx; | ||
570 | } | ||
571 | |||
554 | /* AMD-defined flags: level 0x80000001 */ | 572 | /* AMD-defined flags: level 0x80000001 */ |
555 | xlvl = cpuid_eax(0x80000000); | 573 | xlvl = cpuid_eax(0x80000000); |
556 | c->extended_cpuid_level = xlvl; | 574 | c->extended_cpuid_level = xlvl; |
@@ -576,6 +594,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | |||
576 | if (c->extended_cpuid_level >= 0x80000007) | 594 | if (c->extended_cpuid_level >= 0x80000007) |
577 | c->x86_power = cpuid_edx(0x80000007); | 595 | c->x86_power = cpuid_edx(0x80000007); |
578 | 596 | ||
597 | init_scattered_cpuid_features(c); | ||
579 | } | 598 | } |
580 | 599 | ||
581 | static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) | 600 | static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) |
@@ -731,7 +750,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
731 | 750 | ||
732 | get_model_name(c); /* Default name */ | 751 | get_model_name(c); /* Default name */ |
733 | 752 | ||
734 | init_scattered_cpuid_features(c); | ||
735 | detect_nopl(c); | 753 | detect_nopl(c); |
736 | } | 754 | } |
737 | 755 | ||
@@ -1084,6 +1102,20 @@ static void clear_all_debug_regs(void) | |||
1084 | } | 1102 | } |
1085 | } | 1103 | } |
1086 | 1104 | ||
1105 | #ifdef CONFIG_KGDB | ||
1106 | /* | ||
1107 | * Restore debug regs if using kgdbwait and you have a kernel debugger | ||
1108 | * connection established. | ||
1109 | */ | ||
1110 | static void dbg_restore_debug_regs(void) | ||
1111 | { | ||
1112 | if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break)) | ||
1113 | arch_kgdb_ops.correct_hw_break(); | ||
1114 | } | ||
1115 | #else /* ! CONFIG_KGDB */ | ||
1116 | #define dbg_restore_debug_regs() | ||
1117 | #endif /* ! CONFIG_KGDB */ | ||
1118 | |||
1087 | /* | 1119 | /* |
1088 | * cpu_init() initializes state that is per-CPU. Some data is already | 1120 | * cpu_init() initializes state that is per-CPU. Some data is already |
1089 | * initialized (naturally) in the bootstrap process, such as the GDT | 1121 | * initialized (naturally) in the bootstrap process, such as the GDT |
@@ -1107,9 +1139,9 @@ void __cpuinit cpu_init(void) | |||
1107 | oist = &per_cpu(orig_ist, cpu); | 1139 | oist = &per_cpu(orig_ist, cpu); |
1108 | 1140 | ||
1109 | #ifdef CONFIG_NUMA | 1141 | #ifdef CONFIG_NUMA |
1110 | if (cpu != 0 && percpu_read(node_number) == 0 && | 1142 | if (cpu != 0 && percpu_read(numa_node) == 0 && |
1111 | cpu_to_node(cpu) != NUMA_NO_NODE) | 1143 | early_cpu_to_node(cpu) != NUMA_NO_NODE) |
1112 | percpu_write(node_number, cpu_to_node(cpu)); | 1144 | set_numa_node(early_cpu_to_node(cpu)); |
1113 | #endif | 1145 | #endif |
1114 | 1146 | ||
1115 | me = current; | 1147 | me = current; |
@@ -1174,20 +1206,11 @@ void __cpuinit cpu_init(void) | |||
1174 | load_TR_desc(); | 1206 | load_TR_desc(); |
1175 | load_LDT(&init_mm.context); | 1207 | load_LDT(&init_mm.context); |
1176 | 1208 | ||
1177 | #ifdef CONFIG_KGDB | 1209 | clear_all_debug_regs(); |
1178 | /* | 1210 | dbg_restore_debug_regs(); |
1179 | * If the kgdb is connected no debug regs should be altered. This | ||
1180 | * is only applicable when KGDB and a KGDB I/O module are built | ||
1181 | * into the kernel and you are using early debugging with | ||
1182 | * kgdbwait. KGDB will control the kernel HW breakpoint registers. | ||
1183 | */ | ||
1184 | if (kgdb_connected && arch_kgdb_ops.correct_hw_break) | ||
1185 | arch_kgdb_ops.correct_hw_break(); | ||
1186 | else | ||
1187 | #endif | ||
1188 | clear_all_debug_regs(); | ||
1189 | 1211 | ||
1190 | fpu_init(); | 1212 | fpu_init(); |
1213 | xsave_init(); | ||
1191 | 1214 | ||
1192 | raw_local_save_flags(kernel_eflags); | 1215 | raw_local_save_flags(kernel_eflags); |
1193 | 1216 | ||
@@ -1239,23 +1262,16 @@ void __cpuinit cpu_init(void) | |||
1239 | #endif | 1262 | #endif |
1240 | 1263 | ||
1241 | clear_all_debug_regs(); | 1264 | clear_all_debug_regs(); |
1265 | dbg_restore_debug_regs(); | ||
1242 | 1266 | ||
1243 | /* | 1267 | /* |
1244 | * Force FPU initialization: | 1268 | * Force FPU initialization: |
1245 | */ | 1269 | */ |
1246 | if (cpu_has_xsave) | 1270 | current_thread_info()->status = 0; |
1247 | current_thread_info()->status = TS_XSAVE; | ||
1248 | else | ||
1249 | current_thread_info()->status = 0; | ||
1250 | clear_used_math(); | 1271 | clear_used_math(); |
1251 | mxcsr_feature_mask_init(); | 1272 | mxcsr_feature_mask_init(); |
1252 | 1273 | ||
1253 | /* | 1274 | fpu_init(); |
1254 | * Boot processor to setup the FP and extended state context info. | ||
1255 | */ | ||
1256 | if (smp_processor_id() == boot_cpu_id) | ||
1257 | init_thread_xstate(); | ||
1258 | |||
1259 | xsave_init(); | 1275 | xsave_init(); |
1260 | } | 1276 | } |
1261 | #endif | 1277 | #endif |
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 3624e8a0f71b..f668bb1f7d43 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
@@ -33,5 +33,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], | |||
33 | *const __x86_cpu_dev_end[]; | 33 | *const __x86_cpu_dev_end[]; |
34 | 34 | ||
35 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); | 35 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); |
36 | extern void get_cpu_cap(struct cpuinfo_x86 *c); | ||
36 | 37 | ||
37 | #endif | 38 | #endif |
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index f138c6c389b9..870e6cc6ad28 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig | |||
@@ -10,6 +10,20 @@ if CPU_FREQ | |||
10 | 10 | ||
11 | comment "CPUFreq processor drivers" | 11 | comment "CPUFreq processor drivers" |
12 | 12 | ||
13 | config X86_PCC_CPUFREQ | ||
14 | tristate "Processor Clocking Control interface driver" | ||
15 | depends on ACPI && ACPI_PROCESSOR | ||
16 | help | ||
17 | This driver adds support for the PCC interface. | ||
18 | |||
19 | For details, take a look at: | ||
20 | <file:Documentation/cpu-freq/pcc-cpufreq.txt>. | ||
21 | |||
22 | To compile this driver as a module, choose M here: the | ||
23 | module will be called pcc-cpufreq. | ||
24 | |||
25 | If in doubt, say N. | ||
26 | |||
13 | config X86_ACPI_CPUFREQ | 27 | config X86_ACPI_CPUFREQ |
14 | tristate "ACPI Processor P-States driver" | 28 | tristate "ACPI Processor P-States driver" |
15 | select CPU_FREQ_TABLE | 29 | select CPU_FREQ_TABLE |
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile index 509296df294d..bd54bf67e6fb 100644 --- a/arch/x86/kernel/cpu/cpufreq/Makefile +++ b/arch/x86/kernel/cpu/cpufreq/Makefile | |||
@@ -2,8 +2,9 @@ | |||
2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. | 2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. |
3 | # speedstep-* is preferred over p4-clockmod. | 3 | # speedstep-* is preferred over p4-clockmod. |
4 | 4 | ||
5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o | 5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o |
6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o | 6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o |
7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o | ||
7 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o | 8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o |
8 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o | 9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o |
9 | obj-$(CONFIG_X86_LONGHAUL) += longhaul.o | 10 | obj-$(CONFIG_X86_LONGHAUL) += longhaul.o |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 1b1920fa7c80..cd8da247dda1 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -33,7 +33,7 @@ | |||
33 | #include <linux/cpufreq.h> | 33 | #include <linux/cpufreq.h> |
34 | #include <linux/compiler.h> | 34 | #include <linux/compiler.h> |
35 | #include <linux/dmi.h> | 35 | #include <linux/dmi.h> |
36 | #include <trace/events/power.h> | 36 | #include <linux/slab.h> |
37 | 37 | ||
38 | #include <linux/acpi.h> | 38 | #include <linux/acpi.h> |
39 | #include <linux/io.h> | 39 | #include <linux/io.h> |
@@ -45,6 +45,7 @@ | |||
45 | #include <asm/msr.h> | 45 | #include <asm/msr.h> |
46 | #include <asm/processor.h> | 46 | #include <asm/processor.h> |
47 | #include <asm/cpufeature.h> | 47 | #include <asm/cpufeature.h> |
48 | #include "mperf.h" | ||
48 | 49 | ||
49 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | 50 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ |
50 | "acpi-cpufreq", msg) | 51 | "acpi-cpufreq", msg) |
@@ -70,10 +71,8 @@ struct acpi_cpufreq_data { | |||
70 | 71 | ||
71 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); | 72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); |
72 | 73 | ||
73 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
74 | |||
75 | /* acpi_perf_data is a pointer to percpu data. */ | 74 | /* acpi_perf_data is a pointer to percpu data. */ |
76 | static struct acpi_processor_performance *acpi_perf_data; | 75 | static struct acpi_processor_performance __percpu *acpi_perf_data; |
77 | 76 | ||
78 | static struct cpufreq_driver acpi_cpufreq_driver; | 77 | static struct cpufreq_driver acpi_cpufreq_driver; |
79 | 78 | ||
@@ -239,45 +238,6 @@ static u32 get_cur_val(const struct cpumask *mask) | |||
239 | return cmd.val; | 238 | return cmd.val; |
240 | } | 239 | } |
241 | 240 | ||
242 | /* Called via smp_call_function_single(), on the target CPU */ | ||
243 | static void read_measured_perf_ctrs(void *_cur) | ||
244 | { | ||
245 | struct aperfmperf *am = _cur; | ||
246 | |||
247 | get_aperfmperf(am); | ||
248 | } | ||
249 | |||
250 | /* | ||
251 | * Return the measured active (C0) frequency on this CPU since last call | ||
252 | * to this function. | ||
253 | * Input: cpu number | ||
254 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
255 | * | ||
256 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
257 | * over a period of time, while CPU is in C0 state. | ||
258 | * IA32_MPERF counts at the rate of max advertised frequency | ||
259 | * IA32_APERF counts at the rate of actual CPU frequency | ||
260 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
261 | * no meaning should be associated with absolute values of these MSRs. | ||
262 | */ | ||
263 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, | ||
264 | unsigned int cpu) | ||
265 | { | ||
266 | struct aperfmperf perf; | ||
267 | unsigned long ratio; | ||
268 | unsigned int retval; | ||
269 | |||
270 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
271 | return 0; | ||
272 | |||
273 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
274 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
275 | |||
276 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
277 | |||
278 | return retval; | ||
279 | } | ||
280 | |||
281 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | 241 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) |
282 | { | 242 | { |
283 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); | 243 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); |
@@ -363,8 +323,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
363 | } | 323 | } |
364 | } | 324 | } |
365 | 325 | ||
366 | trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency); | ||
367 | |||
368 | switch (data->cpu_feature) { | 326 | switch (data->cpu_feature) { |
369 | case SYSTEM_INTEL_MSR_CAPABLE: | 327 | case SYSTEM_INTEL_MSR_CAPABLE: |
370 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | 328 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; |
@@ -390,7 +348,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
390 | 348 | ||
391 | freqs.old = perf->states[perf->state].core_frequency * 1000; | 349 | freqs.old = perf->states[perf->state].core_frequency * 1000; |
392 | freqs.new = data->freq_table[next_state].frequency; | 350 | freqs.new = data->freq_table[next_state].frequency; |
393 | for_each_cpu(i, cmd.mask) { | 351 | for_each_cpu(i, policy->cpus) { |
394 | freqs.cpu = i; | 352 | freqs.cpu = i; |
395 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 353 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
396 | } | 354 | } |
@@ -406,7 +364,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
406 | } | 364 | } |
407 | } | 365 | } |
408 | 366 | ||
409 | for_each_cpu(i, cmd.mask) { | 367 | for_each_cpu(i, policy->cpus) { |
410 | freqs.cpu = i; | 368 | freqs.cpu = i; |
411 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 369 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
412 | } | 370 | } |
@@ -701,7 +659,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
701 | 659 | ||
702 | /* Check for APERF/MPERF support in hardware */ | 660 | /* Check for APERF/MPERF support in hardware */ |
703 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | 661 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) |
704 | acpi_cpufreq_driver.getavg = get_measured_perf; | 662 | acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf; |
705 | 663 | ||
706 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); | 664 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); |
707 | for (i = 0; i < perf->state_count; i++) | 665 | for (i = 0; i < perf->state_count; i++) |
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 006b278b0d5d..c587db472a75 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | 22 | ||
23 | #include <linux/slab.h> | ||
24 | #include <linux/delay.h> | 23 | #include <linux/delay.h> |
25 | #include <linux/cpufreq.h> | 24 | #include <linux/cpufreq.h> |
26 | 25 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index ac27ec2264d5..32974cf84232 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c | |||
@@ -80,6 +80,7 @@ | |||
80 | #include <linux/cpufreq.h> | 80 | #include <linux/cpufreq.h> |
81 | #include <linux/pci.h> | 81 | #include <linux/pci.h> |
82 | #include <linux/errno.h> | 82 | #include <linux/errno.h> |
83 | #include <linux/slab.h> | ||
83 | 84 | ||
84 | #include <asm/processor-cyrix.h> | 85 | #include <asm/processor-cyrix.h> |
85 | 86 | ||
@@ -168,12 +169,9 @@ static int gx_freq_mult[16] = { | |||
168 | * Low Level chipset interface * | 169 | * Low Level chipset interface * |
169 | ****************************************************************/ | 170 | ****************************************************************/ |
170 | static struct pci_device_id gx_chipset_tbl[] __initdata = { | 171 | static struct pci_device_id gx_chipset_tbl[] __initdata = { |
171 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, | 172 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), }, |
172 | PCI_ANY_ID, PCI_ANY_ID }, | 173 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), }, |
173 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, | 174 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), }, |
174 | PCI_ANY_ID, PCI_ANY_ID }, | ||
175 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, | ||
176 | PCI_ANY_ID, PCI_ANY_ID }, | ||
177 | { 0, }, | 175 | { 0, }, |
178 | }; | 176 | }; |
179 | 177 | ||
@@ -198,7 +196,7 @@ static __init struct pci_dev *gx_detect_chipset(void) | |||
198 | } | 196 | } |
199 | 197 | ||
200 | /* detect which companion chip is used */ | 198 | /* detect which companion chip is used */ |
201 | while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { | 199 | for_each_pci_dev(gx_pci) { |
202 | if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) | 200 | if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) |
203 | return gx_pci; | 201 | return gx_pci; |
204 | } | 202 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 7e7eea4f8261..03162dac6271 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c | |||
@@ -426,7 +426,7 @@ static int guess_fsb(int mult) | |||
426 | } | 426 | } |
427 | 427 | ||
428 | 428 | ||
429 | static int __init longhaul_get_ranges(void) | 429 | static int __cpuinit longhaul_get_ranges(void) |
430 | { | 430 | { |
431 | unsigned int i, j, k = 0; | 431 | unsigned int i, j, k = 0; |
432 | unsigned int ratio; | 432 | unsigned int ratio; |
@@ -530,7 +530,7 @@ static int __init longhaul_get_ranges(void) | |||
530 | } | 530 | } |
531 | 531 | ||
532 | 532 | ||
533 | static void __init longhaul_setup_voltagescaling(void) | 533 | static void __cpuinit longhaul_setup_voltagescaling(void) |
534 | { | 534 | { |
535 | union msr_longhaul longhaul; | 535 | union msr_longhaul longhaul; |
536 | struct mV_pos minvid, maxvid, vid; | 536 | struct mV_pos minvid, maxvid, vid; |
@@ -784,7 +784,7 @@ static int longhaul_setup_southbridge(void) | |||
784 | return 0; | 784 | return 0; |
785 | } | 785 | } |
786 | 786 | ||
787 | static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | 787 | static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy) |
788 | { | 788 | { |
789 | struct cpuinfo_x86 *c = &cpu_data(0); | 789 | struct cpuinfo_x86 *c = &cpu_data(0); |
790 | char *cpuname = NULL; | 790 | char *cpuname = NULL; |
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h index e2360a469f79..cbf48fbca881 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.h +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h | |||
@@ -56,7 +56,7 @@ union msr_longhaul { | |||
56 | /* | 56 | /* |
57 | * VIA C3 Samuel 1 & Samuel 2 (stepping 0) | 57 | * VIA C3 Samuel 1 & Samuel 2 (stepping 0) |
58 | */ | 58 | */ |
59 | static const int __initdata samuel1_mults[16] = { | 59 | static const int __cpuinitdata samuel1_mults[16] = { |
60 | -1, /* 0000 -> RESERVED */ | 60 | -1, /* 0000 -> RESERVED */ |
61 | 30, /* 0001 -> 3.0x */ | 61 | 30, /* 0001 -> 3.0x */ |
62 | 40, /* 0010 -> 4.0x */ | 62 | 40, /* 0010 -> 4.0x */ |
@@ -75,7 +75,7 @@ static const int __initdata samuel1_mults[16] = { | |||
75 | -1, /* 1111 -> RESERVED */ | 75 | -1, /* 1111 -> RESERVED */ |
76 | }; | 76 | }; |
77 | 77 | ||
78 | static const int __initdata samuel1_eblcr[16] = { | 78 | static const int __cpuinitdata samuel1_eblcr[16] = { |
79 | 50, /* 0000 -> RESERVED */ | 79 | 50, /* 0000 -> RESERVED */ |
80 | 30, /* 0001 -> 3.0x */ | 80 | 30, /* 0001 -> 3.0x */ |
81 | 40, /* 0010 -> 4.0x */ | 81 | 40, /* 0010 -> 4.0x */ |
@@ -97,7 +97,7 @@ static const int __initdata samuel1_eblcr[16] = { | |||
97 | /* | 97 | /* |
98 | * VIA C3 Samuel2 Stepping 1->15 | 98 | * VIA C3 Samuel2 Stepping 1->15 |
99 | */ | 99 | */ |
100 | static const int __initdata samuel2_eblcr[16] = { | 100 | static const int __cpuinitdata samuel2_eblcr[16] = { |
101 | 50, /* 0000 -> 5.0x */ | 101 | 50, /* 0000 -> 5.0x */ |
102 | 30, /* 0001 -> 3.0x */ | 102 | 30, /* 0001 -> 3.0x */ |
103 | 40, /* 0010 -> 4.0x */ | 103 | 40, /* 0010 -> 4.0x */ |
@@ -119,7 +119,7 @@ static const int __initdata samuel2_eblcr[16] = { | |||
119 | /* | 119 | /* |
120 | * VIA C3 Ezra | 120 | * VIA C3 Ezra |
121 | */ | 121 | */ |
122 | static const int __initdata ezra_mults[16] = { | 122 | static const int __cpuinitdata ezra_mults[16] = { |
123 | 100, /* 0000 -> 10.0x */ | 123 | 100, /* 0000 -> 10.0x */ |
124 | 30, /* 0001 -> 3.0x */ | 124 | 30, /* 0001 -> 3.0x */ |
125 | 40, /* 0010 -> 4.0x */ | 125 | 40, /* 0010 -> 4.0x */ |
@@ -138,7 +138,7 @@ static const int __initdata ezra_mults[16] = { | |||
138 | 120, /* 1111 -> 12.0x */ | 138 | 120, /* 1111 -> 12.0x */ |
139 | }; | 139 | }; |
140 | 140 | ||
141 | static const int __initdata ezra_eblcr[16] = { | 141 | static const int __cpuinitdata ezra_eblcr[16] = { |
142 | 50, /* 0000 -> 5.0x */ | 142 | 50, /* 0000 -> 5.0x */ |
143 | 30, /* 0001 -> 3.0x */ | 143 | 30, /* 0001 -> 3.0x */ |
144 | 40, /* 0010 -> 4.0x */ | 144 | 40, /* 0010 -> 4.0x */ |
@@ -160,7 +160,7 @@ static const int __initdata ezra_eblcr[16] = { | |||
160 | /* | 160 | /* |
161 | * VIA C3 (Ezra-T) [C5M]. | 161 | * VIA C3 (Ezra-T) [C5M]. |
162 | */ | 162 | */ |
163 | static const int __initdata ezrat_mults[32] = { | 163 | static const int __cpuinitdata ezrat_mults[32] = { |
164 | 100, /* 0000 -> 10.0x */ | 164 | 100, /* 0000 -> 10.0x */ |
165 | 30, /* 0001 -> 3.0x */ | 165 | 30, /* 0001 -> 3.0x */ |
166 | 40, /* 0010 -> 4.0x */ | 166 | 40, /* 0010 -> 4.0x */ |
@@ -196,7 +196,7 @@ static const int __initdata ezrat_mults[32] = { | |||
196 | -1, /* 1111 -> RESERVED (12.0x) */ | 196 | -1, /* 1111 -> RESERVED (12.0x) */ |
197 | }; | 197 | }; |
198 | 198 | ||
199 | static const int __initdata ezrat_eblcr[32] = { | 199 | static const int __cpuinitdata ezrat_eblcr[32] = { |
200 | 50, /* 0000 -> 5.0x */ | 200 | 50, /* 0000 -> 5.0x */ |
201 | 30, /* 0001 -> 3.0x */ | 201 | 30, /* 0001 -> 3.0x */ |
202 | 40, /* 0010 -> 4.0x */ | 202 | 40, /* 0010 -> 4.0x */ |
@@ -235,7 +235,7 @@ static const int __initdata ezrat_eblcr[32] = { | |||
235 | /* | 235 | /* |
236 | * VIA C3 Nehemiah */ | 236 | * VIA C3 Nehemiah */ |
237 | 237 | ||
238 | static const int __initdata nehemiah_mults[32] = { | 238 | static const int __cpuinitdata nehemiah_mults[32] = { |
239 | 100, /* 0000 -> 10.0x */ | 239 | 100, /* 0000 -> 10.0x */ |
240 | -1, /* 0001 -> 16.0x */ | 240 | -1, /* 0001 -> 16.0x */ |
241 | 40, /* 0010 -> 4.0x */ | 241 | 40, /* 0010 -> 4.0x */ |
@@ -270,7 +270,7 @@ static const int __initdata nehemiah_mults[32] = { | |||
270 | -1, /* 1111 -> 12.0x */ | 270 | -1, /* 1111 -> 12.0x */ |
271 | }; | 271 | }; |
272 | 272 | ||
273 | static const int __initdata nehemiah_eblcr[32] = { | 273 | static const int __cpuinitdata nehemiah_eblcr[32] = { |
274 | 50, /* 0000 -> 5.0x */ | 274 | 50, /* 0000 -> 5.0x */ |
275 | 160, /* 0001 -> 16.0x */ | 275 | 160, /* 0001 -> 16.0x */ |
276 | 40, /* 0010 -> 4.0x */ | 276 | 40, /* 0010 -> 4.0x */ |
@@ -315,7 +315,7 @@ struct mV_pos { | |||
315 | unsigned short pos; | 315 | unsigned short pos; |
316 | }; | 316 | }; |
317 | 317 | ||
318 | static const struct mV_pos __initdata vrm85_mV[32] = { | 318 | static const struct mV_pos __cpuinitdata vrm85_mV[32] = { |
319 | {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, | 319 | {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, |
320 | {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, | 320 | {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, |
321 | {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, | 321 | {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, |
@@ -326,14 +326,14 @@ static const struct mV_pos __initdata vrm85_mV[32] = { | |||
326 | {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} | 326 | {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} |
327 | }; | 327 | }; |
328 | 328 | ||
329 | static const unsigned char __initdata mV_vrm85[32] = { | 329 | static const unsigned char __cpuinitdata mV_vrm85[32] = { |
330 | 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, | 330 | 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, |
331 | 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, | 331 | 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, |
332 | 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, | 332 | 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, |
333 | 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 | 333 | 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 |
334 | }; | 334 | }; |
335 | 335 | ||
336 | static const struct mV_pos __initdata mobilevrm_mV[32] = { | 336 | static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = { |
337 | {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, | 337 | {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, |
338 | {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, | 338 | {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, |
339 | {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, | 339 | {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, |
@@ -344,7 +344,7 @@ static const struct mV_pos __initdata mobilevrm_mV[32] = { | |||
344 | {675, 3}, {650, 2}, {625, 1}, {600, 0} | 344 | {675, 3}, {650, 2}, {625, 1}, {600, 0} |
345 | }; | 345 | }; |
346 | 346 | ||
347 | static const unsigned char __initdata mV_mobilevrm[32] = { | 347 | static const unsigned char __cpuinitdata mV_mobilevrm[32] = { |
348 | 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, | 348 | 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, |
349 | 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, | 349 | 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, |
350 | 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, | 350 | 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, |
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index da5f70fcb766..fc09f142d94d 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/slab.h> | ||
13 | #include <linux/cpufreq.h> | 12 | #include <linux/cpufreq.h> |
14 | #include <linux/timex.h> | 13 | #include <linux/timex.h> |
15 | 14 | ||
@@ -166,8 +165,8 @@ static unsigned int longrun_get(unsigned int cpu) | |||
166 | * TMTA rules: | 165 | * TMTA rules: |
167 | * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) | 166 | * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) |
168 | */ | 167 | */ |
169 | static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, | 168 | static unsigned int __cpuinit longrun_determine_freqs(unsigned int *low_freq, |
170 | unsigned int *high_freq) | 169 | unsigned int *high_freq) |
171 | { | 170 | { |
172 | u32 msr_lo, msr_hi; | 171 | u32 msr_lo, msr_hi; |
173 | u32 save_lo, save_hi; | 172 | u32 save_lo, save_hi; |
@@ -259,7 +258,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, | |||
259 | } | 258 | } |
260 | 259 | ||
261 | 260 | ||
262 | static int __init longrun_cpu_init(struct cpufreq_policy *policy) | 261 | static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy) |
263 | { | 262 | { |
264 | int result = 0; | 263 | int result = 0; |
265 | 264 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.c b/arch/x86/kernel/cpu/cpufreq/mperf.c new file mode 100644 index 000000000000..911e193018ae --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/mperf.c | |||
@@ -0,0 +1,51 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/smp.h> | ||
3 | #include <linux/module.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/cpufreq.h> | ||
6 | #include <linux/slab.h> | ||
7 | |||
8 | #include "mperf.h" | ||
9 | |||
10 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
11 | |||
12 | /* Called via smp_call_function_single(), on the target CPU */ | ||
13 | static void read_measured_perf_ctrs(void *_cur) | ||
14 | { | ||
15 | struct aperfmperf *am = _cur; | ||
16 | |||
17 | get_aperfmperf(am); | ||
18 | } | ||
19 | |||
20 | /* | ||
21 | * Return the measured active (C0) frequency on this CPU since last call | ||
22 | * to this function. | ||
23 | * Input: cpu number | ||
24 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
25 | * | ||
26 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
27 | * over a period of time, while CPU is in C0 state. | ||
28 | * IA32_MPERF counts at the rate of max advertised frequency | ||
29 | * IA32_APERF counts at the rate of actual CPU frequency | ||
30 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
31 | * no meaning should be associated with absolute values of these MSRs. | ||
32 | */ | ||
33 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
34 | unsigned int cpu) | ||
35 | { | ||
36 | struct aperfmperf perf; | ||
37 | unsigned long ratio; | ||
38 | unsigned int retval; | ||
39 | |||
40 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
41 | return 0; | ||
42 | |||
43 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
44 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
45 | |||
46 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
47 | |||
48 | return retval; | ||
49 | } | ||
50 | EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf); | ||
51 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.h b/arch/x86/kernel/cpu/cpufreq/mperf.h new file mode 100644 index 000000000000..5dbf2950dc22 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/mperf.h | |||
@@ -0,0 +1,9 @@ | |||
1 | /* | ||
2 | * (c) 2010 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | */ | ||
7 | |||
8 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
9 | unsigned int cpu); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 869615193720..bd1cac747f67 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <linux/init.h> | 25 | #include <linux/init.h> |
26 | #include <linux/smp.h> | 26 | #include <linux/smp.h> |
27 | #include <linux/cpufreq.h> | 27 | #include <linux/cpufreq.h> |
28 | #include <linux/slab.h> | ||
29 | #include <linux/cpumask.h> | 28 | #include <linux/cpumask.h> |
30 | #include <linux/timex.h> | 29 | #include <linux/timex.h> |
31 | 30 | ||
@@ -179,13 +178,8 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) | |||
179 | } | 178 | } |
180 | } | 179 | } |
181 | 180 | ||
182 | if (c->x86 != 0xF) { | 181 | if (c->x86 != 0xF) |
183 | if (!cpu_has(c, X86_FEATURE_EST)) | ||
184 | printk(KERN_WARNING PFX "Unknown CPU. " | ||
185 | "Please send an e-mail to " | ||
186 | "<cpufreq@vger.kernel.org>\n"); | ||
187 | return 0; | 182 | return 0; |
188 | } | ||
189 | 183 | ||
190 | /* on P-4s, the TSC runs with constant frequency independent whether | 184 | /* on P-4s, the TSC runs with constant frequency independent whether |
191 | * throttling is active or not. */ | 185 | * throttling is active or not. */ |
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c new file mode 100644 index 000000000000..4f6f679f2799 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c | |||
@@ -0,0 +1,626 @@ | |||
1 | /* | ||
2 | * pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface | ||
3 | * | ||
4 | * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com> | ||
5 | * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. | ||
6 | * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com> | ||
7 | * | ||
8 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; version 2 of the License. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, but | ||
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON | ||
17 | * INFRINGEMENT. See the GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write to the Free Software Foundation, Inc., | ||
21 | * 675 Mass Ave, Cambridge, MA 02139, USA. | ||
22 | * | ||
23 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
24 | */ | ||
25 | |||
26 | #include <linux/kernel.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <linux/smp.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/cpufreq.h> | ||
32 | #include <linux/compiler.h> | ||
33 | #include <linux/slab.h> | ||
34 | |||
35 | #include <linux/acpi.h> | ||
36 | #include <linux/io.h> | ||
37 | #include <linux/spinlock.h> | ||
38 | #include <linux/uaccess.h> | ||
39 | |||
40 | #include <acpi/processor.h> | ||
41 | |||
42 | #define PCC_VERSION "1.00.00" | ||
43 | #define POLL_LOOPS 300 | ||
44 | |||
45 | #define CMD_COMPLETE 0x1 | ||
46 | #define CMD_GET_FREQ 0x0 | ||
47 | #define CMD_SET_FREQ 0x1 | ||
48 | |||
49 | #define BUF_SZ 4 | ||
50 | |||
51 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
52 | "pcc-cpufreq", msg) | ||
53 | |||
54 | struct pcc_register_resource { | ||
55 | u8 descriptor; | ||
56 | u16 length; | ||
57 | u8 space_id; | ||
58 | u8 bit_width; | ||
59 | u8 bit_offset; | ||
60 | u8 access_size; | ||
61 | u64 address; | ||
62 | } __attribute__ ((packed)); | ||
63 | |||
64 | struct pcc_memory_resource { | ||
65 | u8 descriptor; | ||
66 | u16 length; | ||
67 | u8 space_id; | ||
68 | u8 resource_usage; | ||
69 | u8 type_specific; | ||
70 | u64 granularity; | ||
71 | u64 minimum; | ||
72 | u64 maximum; | ||
73 | u64 translation_offset; | ||
74 | u64 address_length; | ||
75 | } __attribute__ ((packed)); | ||
76 | |||
77 | static struct cpufreq_driver pcc_cpufreq_driver; | ||
78 | |||
79 | struct pcc_header { | ||
80 | u32 signature; | ||
81 | u16 length; | ||
82 | u8 major; | ||
83 | u8 minor; | ||
84 | u32 features; | ||
85 | u16 command; | ||
86 | u16 status; | ||
87 | u32 latency; | ||
88 | u32 minimum_time; | ||
89 | u32 maximum_time; | ||
90 | u32 nominal; | ||
91 | u32 throttled_frequency; | ||
92 | u32 minimum_frequency; | ||
93 | }; | ||
94 | |||
95 | static void __iomem *pcch_virt_addr; | ||
96 | static struct pcc_header __iomem *pcch_hdr; | ||
97 | |||
98 | static DEFINE_SPINLOCK(pcc_lock); | ||
99 | |||
100 | static struct acpi_generic_address doorbell; | ||
101 | |||
102 | static u64 doorbell_preserve; | ||
103 | static u64 doorbell_write; | ||
104 | |||
105 | static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f, | ||
106 | 0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46}; | ||
107 | |||
108 | struct pcc_cpu { | ||
109 | u32 input_offset; | ||
110 | u32 output_offset; | ||
111 | }; | ||
112 | |||
113 | static struct pcc_cpu __percpu *pcc_cpu_info; | ||
114 | |||
115 | static int pcc_cpufreq_verify(struct cpufreq_policy *policy) | ||
116 | { | ||
117 | cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, | ||
118 | policy->cpuinfo.max_freq); | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static inline void pcc_cmd(void) | ||
123 | { | ||
124 | u64 doorbell_value; | ||
125 | int i; | ||
126 | |||
127 | acpi_read(&doorbell_value, &doorbell); | ||
128 | acpi_write((doorbell_value & doorbell_preserve) | doorbell_write, | ||
129 | &doorbell); | ||
130 | |||
131 | for (i = 0; i < POLL_LOOPS; i++) { | ||
132 | if (ioread16(&pcch_hdr->status) & CMD_COMPLETE) | ||
133 | break; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | static inline void pcc_clear_mapping(void) | ||
138 | { | ||
139 | if (pcch_virt_addr) | ||
140 | iounmap(pcch_virt_addr); | ||
141 | pcch_virt_addr = NULL; | ||
142 | } | ||
143 | |||
144 | static unsigned int pcc_get_freq(unsigned int cpu) | ||
145 | { | ||
146 | struct pcc_cpu *pcc_cpu_data; | ||
147 | unsigned int curr_freq; | ||
148 | unsigned int freq_limit; | ||
149 | u16 status; | ||
150 | u32 input_buffer; | ||
151 | u32 output_buffer; | ||
152 | |||
153 | spin_lock(&pcc_lock); | ||
154 | |||
155 | dprintk("get: get_freq for CPU %d\n", cpu); | ||
156 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
157 | |||
158 | input_buffer = 0x1; | ||
159 | iowrite32(input_buffer, | ||
160 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
161 | iowrite16(CMD_GET_FREQ, &pcch_hdr->command); | ||
162 | |||
163 | pcc_cmd(); | ||
164 | |||
165 | output_buffer = | ||
166 | ioread32(pcch_virt_addr + pcc_cpu_data->output_offset); | ||
167 | |||
168 | /* Clear the input buffer - we are done with the current command */ | ||
169 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
170 | |||
171 | status = ioread16(&pcch_hdr->status); | ||
172 | if (status != CMD_COMPLETE) { | ||
173 | dprintk("get: FAILED: for CPU %d, status is %d\n", | ||
174 | cpu, status); | ||
175 | goto cmd_incomplete; | ||
176 | } | ||
177 | iowrite16(0, &pcch_hdr->status); | ||
178 | curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff)) | ||
179 | / 100) * 1000); | ||
180 | |||
181 | dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is " | ||
182 | "0x%x, contains a value of: 0x%x. Speed is: %d MHz\n", | ||
183 | cpu, (pcch_virt_addr + pcc_cpu_data->output_offset), | ||
184 | output_buffer, curr_freq); | ||
185 | |||
186 | freq_limit = (output_buffer >> 8) & 0xff; | ||
187 | if (freq_limit != 0xff) { | ||
188 | dprintk("get: frequency for cpu %d is being temporarily" | ||
189 | " capped at %d\n", cpu, curr_freq); | ||
190 | } | ||
191 | |||
192 | spin_unlock(&pcc_lock); | ||
193 | return curr_freq; | ||
194 | |||
195 | cmd_incomplete: | ||
196 | iowrite16(0, &pcch_hdr->status); | ||
197 | spin_unlock(&pcc_lock); | ||
198 | return -EINVAL; | ||
199 | } | ||
200 | |||
201 | static int pcc_cpufreq_target(struct cpufreq_policy *policy, | ||
202 | unsigned int target_freq, | ||
203 | unsigned int relation) | ||
204 | { | ||
205 | struct pcc_cpu *pcc_cpu_data; | ||
206 | struct cpufreq_freqs freqs; | ||
207 | u16 status; | ||
208 | u32 input_buffer; | ||
209 | int cpu; | ||
210 | |||
211 | spin_lock(&pcc_lock); | ||
212 | cpu = policy->cpu; | ||
213 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
214 | |||
215 | dprintk("target: CPU %d should go to target freq: %d " | ||
216 | "(virtual) input_offset is 0x%x\n", | ||
217 | cpu, target_freq, | ||
218 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
219 | |||
220 | freqs.new = target_freq; | ||
221 | freqs.cpu = cpu; | ||
222 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
223 | |||
224 | input_buffer = 0x1 | (((target_freq * 100) | ||
225 | / (ioread32(&pcch_hdr->nominal) * 1000)) << 8); | ||
226 | iowrite32(input_buffer, | ||
227 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
228 | iowrite16(CMD_SET_FREQ, &pcch_hdr->command); | ||
229 | |||
230 | pcc_cmd(); | ||
231 | |||
232 | /* Clear the input buffer - we are done with the current command */ | ||
233 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
234 | |||
235 | status = ioread16(&pcch_hdr->status); | ||
236 | if (status != CMD_COMPLETE) { | ||
237 | dprintk("target: FAILED for cpu %d, with status: 0x%x\n", | ||
238 | cpu, status); | ||
239 | goto cmd_incomplete; | ||
240 | } | ||
241 | iowrite16(0, &pcch_hdr->status); | ||
242 | |||
243 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
244 | dprintk("target: was SUCCESSFUL for cpu %d\n", cpu); | ||
245 | spin_unlock(&pcc_lock); | ||
246 | |||
247 | return 0; | ||
248 | |||
249 | cmd_incomplete: | ||
250 | iowrite16(0, &pcch_hdr->status); | ||
251 | spin_unlock(&pcc_lock); | ||
252 | return -EINVAL; | ||
253 | } | ||
254 | |||
255 | static int pcc_get_offset(int cpu) | ||
256 | { | ||
257 | acpi_status status; | ||
258 | struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
259 | union acpi_object *pccp, *offset; | ||
260 | struct pcc_cpu *pcc_cpu_data; | ||
261 | struct acpi_processor *pr; | ||
262 | int ret = 0; | ||
263 | |||
264 | pr = per_cpu(processors, cpu); | ||
265 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
266 | |||
267 | status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer); | ||
268 | if (ACPI_FAILURE(status)) | ||
269 | return -ENODEV; | ||
270 | |||
271 | pccp = buffer.pointer; | ||
272 | if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) { | ||
273 | ret = -ENODEV; | ||
274 | goto out_free; | ||
275 | }; | ||
276 | |||
277 | offset = &(pccp->package.elements[0]); | ||
278 | if (!offset || offset->type != ACPI_TYPE_INTEGER) { | ||
279 | ret = -ENODEV; | ||
280 | goto out_free; | ||
281 | } | ||
282 | |||
283 | pcc_cpu_data->input_offset = offset->integer.value; | ||
284 | |||
285 | offset = &(pccp->package.elements[1]); | ||
286 | if (!offset || offset->type != ACPI_TYPE_INTEGER) { | ||
287 | ret = -ENODEV; | ||
288 | goto out_free; | ||
289 | } | ||
290 | |||
291 | pcc_cpu_data->output_offset = offset->integer.value; | ||
292 | |||
293 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
294 | memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ); | ||
295 | |||
296 | dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data " | ||
297 | "input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n", | ||
298 | cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset); | ||
299 | out_free: | ||
300 | kfree(buffer.pointer); | ||
301 | return ret; | ||
302 | } | ||
303 | |||
304 | static int __init pcc_cpufreq_do_osc(acpi_handle *handle) | ||
305 | { | ||
306 | acpi_status status; | ||
307 | struct acpi_object_list input; | ||
308 | struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
309 | union acpi_object in_params[4]; | ||
310 | union acpi_object *out_obj; | ||
311 | u32 capabilities[2]; | ||
312 | u32 errors; | ||
313 | u32 supported; | ||
314 | int ret = 0; | ||
315 | |||
316 | input.count = 4; | ||
317 | input.pointer = in_params; | ||
318 | input.count = 4; | ||
319 | input.pointer = in_params; | ||
320 | in_params[0].type = ACPI_TYPE_BUFFER; | ||
321 | in_params[0].buffer.length = 16; | ||
322 | in_params[0].buffer.pointer = OSC_UUID; | ||
323 | in_params[1].type = ACPI_TYPE_INTEGER; | ||
324 | in_params[1].integer.value = 1; | ||
325 | in_params[2].type = ACPI_TYPE_INTEGER; | ||
326 | in_params[2].integer.value = 2; | ||
327 | in_params[3].type = ACPI_TYPE_BUFFER; | ||
328 | in_params[3].buffer.length = 8; | ||
329 | in_params[3].buffer.pointer = (u8 *)&capabilities; | ||
330 | |||
331 | capabilities[0] = OSC_QUERY_ENABLE; | ||
332 | capabilities[1] = 0x1; | ||
333 | |||
334 | status = acpi_evaluate_object(*handle, "_OSC", &input, &output); | ||
335 | if (ACPI_FAILURE(status)) | ||
336 | return -ENODEV; | ||
337 | |||
338 | if (!output.length) | ||
339 | return -ENODEV; | ||
340 | |||
341 | out_obj = output.pointer; | ||
342 | if (out_obj->type != ACPI_TYPE_BUFFER) { | ||
343 | ret = -ENODEV; | ||
344 | goto out_free; | ||
345 | } | ||
346 | |||
347 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); | ||
348 | if (errors) { | ||
349 | ret = -ENODEV; | ||
350 | goto out_free; | ||
351 | } | ||
352 | |||
353 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); | ||
354 | if (!(supported & 0x1)) { | ||
355 | ret = -ENODEV; | ||
356 | goto out_free; | ||
357 | } | ||
358 | |||
359 | kfree(output.pointer); | ||
360 | capabilities[0] = 0x0; | ||
361 | capabilities[1] = 0x1; | ||
362 | |||
363 | status = acpi_evaluate_object(*handle, "_OSC", &input, &output); | ||
364 | if (ACPI_FAILURE(status)) | ||
365 | return -ENODEV; | ||
366 | |||
367 | if (!output.length) | ||
368 | return -ENODEV; | ||
369 | |||
370 | out_obj = output.pointer; | ||
371 | if (out_obj->type != ACPI_TYPE_BUFFER) { | ||
372 | ret = -ENODEV; | ||
373 | goto out_free; | ||
374 | } | ||
375 | |||
376 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); | ||
377 | if (errors) { | ||
378 | ret = -ENODEV; | ||
379 | goto out_free; | ||
380 | } | ||
381 | |||
382 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); | ||
383 | if (!(supported & 0x1)) { | ||
384 | ret = -ENODEV; | ||
385 | goto out_free; | ||
386 | } | ||
387 | |||
388 | out_free: | ||
389 | kfree(output.pointer); | ||
390 | return ret; | ||
391 | } | ||
392 | |||
393 | static int __init pcc_cpufreq_probe(void) | ||
394 | { | ||
395 | acpi_status status; | ||
396 | struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
397 | struct pcc_memory_resource *mem_resource; | ||
398 | struct pcc_register_resource *reg_resource; | ||
399 | union acpi_object *out_obj, *member; | ||
400 | acpi_handle handle, osc_handle, pcch_handle; | ||
401 | int ret = 0; | ||
402 | |||
403 | status = acpi_get_handle(NULL, "\\_SB", &handle); | ||
404 | if (ACPI_FAILURE(status)) | ||
405 | return -ENODEV; | ||
406 | |||
407 | status = acpi_get_handle(handle, "PCCH", &pcch_handle); | ||
408 | if (ACPI_FAILURE(status)) | ||
409 | return -ENODEV; | ||
410 | |||
411 | status = acpi_get_handle(handle, "_OSC", &osc_handle); | ||
412 | if (ACPI_SUCCESS(status)) { | ||
413 | ret = pcc_cpufreq_do_osc(&osc_handle); | ||
414 | if (ret) | ||
415 | dprintk("probe: _OSC evaluation did not succeed\n"); | ||
416 | /* Firmware's use of _OSC is optional */ | ||
417 | ret = 0; | ||
418 | } | ||
419 | |||
420 | status = acpi_evaluate_object(handle, "PCCH", NULL, &output); | ||
421 | if (ACPI_FAILURE(status)) | ||
422 | return -ENODEV; | ||
423 | |||
424 | out_obj = output.pointer; | ||
425 | if (out_obj->type != ACPI_TYPE_PACKAGE) { | ||
426 | ret = -ENODEV; | ||
427 | goto out_free; | ||
428 | } | ||
429 | |||
430 | member = &out_obj->package.elements[0]; | ||
431 | if (member->type != ACPI_TYPE_BUFFER) { | ||
432 | ret = -ENODEV; | ||
433 | goto out_free; | ||
434 | } | ||
435 | |||
436 | mem_resource = (struct pcc_memory_resource *)member->buffer.pointer; | ||
437 | |||
438 | dprintk("probe: mem_resource descriptor: 0x%x," | ||
439 | " length: %d, space_id: %d, resource_usage: %d," | ||
440 | " type_specific: %d, granularity: 0x%llx," | ||
441 | " minimum: 0x%llx, maximum: 0x%llx," | ||
442 | " translation_offset: 0x%llx, address_length: 0x%llx\n", | ||
443 | mem_resource->descriptor, mem_resource->length, | ||
444 | mem_resource->space_id, mem_resource->resource_usage, | ||
445 | mem_resource->type_specific, mem_resource->granularity, | ||
446 | mem_resource->minimum, mem_resource->maximum, | ||
447 | mem_resource->translation_offset, | ||
448 | mem_resource->address_length); | ||
449 | |||
450 | if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) { | ||
451 | ret = -ENODEV; | ||
452 | goto out_free; | ||
453 | } | ||
454 | |||
455 | pcch_virt_addr = ioremap_nocache(mem_resource->minimum, | ||
456 | mem_resource->address_length); | ||
457 | if (pcch_virt_addr == NULL) { | ||
458 | dprintk("probe: could not map shared mem region\n"); | ||
459 | goto out_free; | ||
460 | } | ||
461 | pcch_hdr = pcch_virt_addr; | ||
462 | |||
463 | dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr); | ||
464 | dprintk("probe: PCCH header is at physical address: 0x%llx," | ||
465 | " signature: 0x%x, length: %d bytes, major: %d, minor: %d," | ||
466 | " supported features: 0x%x, command field: 0x%x," | ||
467 | " status field: 0x%x, nominal latency: %d us\n", | ||
468 | mem_resource->minimum, ioread32(&pcch_hdr->signature), | ||
469 | ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major), | ||
470 | ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features), | ||
471 | ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status), | ||
472 | ioread32(&pcch_hdr->latency)); | ||
473 | |||
474 | dprintk("probe: min time between commands: %d us," | ||
475 | " max time between commands: %d us," | ||
476 | " nominal CPU frequency: %d MHz," | ||
477 | " minimum CPU frequency: %d MHz," | ||
478 | " minimum CPU frequency without throttling: %d MHz\n", | ||
479 | ioread32(&pcch_hdr->minimum_time), | ||
480 | ioread32(&pcch_hdr->maximum_time), | ||
481 | ioread32(&pcch_hdr->nominal), | ||
482 | ioread32(&pcch_hdr->throttled_frequency), | ||
483 | ioread32(&pcch_hdr->minimum_frequency)); | ||
484 | |||
485 | member = &out_obj->package.elements[1]; | ||
486 | if (member->type != ACPI_TYPE_BUFFER) { | ||
487 | ret = -ENODEV; | ||
488 | goto pcch_free; | ||
489 | } | ||
490 | |||
491 | reg_resource = (struct pcc_register_resource *)member->buffer.pointer; | ||
492 | |||
493 | doorbell.space_id = reg_resource->space_id; | ||
494 | doorbell.bit_width = reg_resource->bit_width; | ||
495 | doorbell.bit_offset = reg_resource->bit_offset; | ||
496 | doorbell.access_width = 64; | ||
497 | doorbell.address = reg_resource->address; | ||
498 | |||
499 | dprintk("probe: doorbell: space_id is %d, bit_width is %d, " | ||
500 | "bit_offset is %d, access_width is %d, address is 0x%llx\n", | ||
501 | doorbell.space_id, doorbell.bit_width, doorbell.bit_offset, | ||
502 | doorbell.access_width, reg_resource->address); | ||
503 | |||
504 | member = &out_obj->package.elements[2]; | ||
505 | if (member->type != ACPI_TYPE_INTEGER) { | ||
506 | ret = -ENODEV; | ||
507 | goto pcch_free; | ||
508 | } | ||
509 | |||
510 | doorbell_preserve = member->integer.value; | ||
511 | |||
512 | member = &out_obj->package.elements[3]; | ||
513 | if (member->type != ACPI_TYPE_INTEGER) { | ||
514 | ret = -ENODEV; | ||
515 | goto pcch_free; | ||
516 | } | ||
517 | |||
518 | doorbell_write = member->integer.value; | ||
519 | |||
520 | dprintk("probe: doorbell_preserve: 0x%llx," | ||
521 | " doorbell_write: 0x%llx\n", | ||
522 | doorbell_preserve, doorbell_write); | ||
523 | |||
524 | pcc_cpu_info = alloc_percpu(struct pcc_cpu); | ||
525 | if (!pcc_cpu_info) { | ||
526 | ret = -ENOMEM; | ||
527 | goto pcch_free; | ||
528 | } | ||
529 | |||
530 | printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency" | ||
531 | " limits: %d MHz, %d MHz\n", PCC_VERSION, | ||
532 | ioread32(&pcch_hdr->minimum_frequency), | ||
533 | ioread32(&pcch_hdr->nominal)); | ||
534 | kfree(output.pointer); | ||
535 | return ret; | ||
536 | pcch_free: | ||
537 | pcc_clear_mapping(); | ||
538 | out_free: | ||
539 | kfree(output.pointer); | ||
540 | return ret; | ||
541 | } | ||
542 | |||
543 | static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) | ||
544 | { | ||
545 | unsigned int cpu = policy->cpu; | ||
546 | unsigned int result = 0; | ||
547 | |||
548 | if (!pcch_virt_addr) { | ||
549 | result = -1; | ||
550 | goto out; | ||
551 | } | ||
552 | |||
553 | result = pcc_get_offset(cpu); | ||
554 | if (result) { | ||
555 | dprintk("init: PCCP evaluation failed\n"); | ||
556 | goto out; | ||
557 | } | ||
558 | |||
559 | policy->max = policy->cpuinfo.max_freq = | ||
560 | ioread32(&pcch_hdr->nominal) * 1000; | ||
561 | policy->min = policy->cpuinfo.min_freq = | ||
562 | ioread32(&pcch_hdr->minimum_frequency) * 1000; | ||
563 | policy->cur = pcc_get_freq(cpu); | ||
564 | |||
565 | if (!policy->cur) { | ||
566 | dprintk("init: Unable to get current CPU frequency\n"); | ||
567 | result = -EINVAL; | ||
568 | goto out; | ||
569 | } | ||
570 | |||
571 | dprintk("init: policy->max is %d, policy->min is %d\n", | ||
572 | policy->max, policy->min); | ||
573 | out: | ||
574 | return result; | ||
575 | } | ||
576 | |||
577 | static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy) | ||
578 | { | ||
579 | return 0; | ||
580 | } | ||
581 | |||
582 | static struct cpufreq_driver pcc_cpufreq_driver = { | ||
583 | .flags = CPUFREQ_CONST_LOOPS, | ||
584 | .get = pcc_get_freq, | ||
585 | .verify = pcc_cpufreq_verify, | ||
586 | .target = pcc_cpufreq_target, | ||
587 | .init = pcc_cpufreq_cpu_init, | ||
588 | .exit = pcc_cpufreq_cpu_exit, | ||
589 | .name = "pcc-cpufreq", | ||
590 | .owner = THIS_MODULE, | ||
591 | }; | ||
592 | |||
593 | static int __init pcc_cpufreq_init(void) | ||
594 | { | ||
595 | int ret; | ||
596 | |||
597 | if (acpi_disabled) | ||
598 | return 0; | ||
599 | |||
600 | ret = pcc_cpufreq_probe(); | ||
601 | if (ret) { | ||
602 | dprintk("pcc_cpufreq_init: PCCH evaluation failed\n"); | ||
603 | return ret; | ||
604 | } | ||
605 | |||
606 | ret = cpufreq_register_driver(&pcc_cpufreq_driver); | ||
607 | |||
608 | return ret; | ||
609 | } | ||
610 | |||
611 | static void __exit pcc_cpufreq_exit(void) | ||
612 | { | ||
613 | cpufreq_unregister_driver(&pcc_cpufreq_driver); | ||
614 | |||
615 | pcc_clear_mapping(); | ||
616 | |||
617 | free_percpu(pcc_cpu_info); | ||
618 | } | ||
619 | |||
620 | MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar"); | ||
621 | MODULE_VERSION(PCC_VERSION); | ||
622 | MODULE_DESCRIPTION("Processor Clocking Control interface driver"); | ||
623 | MODULE_LICENSE("GPL"); | ||
624 | |||
625 | late_initcall(pcc_cpufreq_init); | ||
626 | module_exit(pcc_cpufreq_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index cb01dac267d3..b3379d6a5c57 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/cpufreq.h> | 14 | #include <linux/cpufreq.h> |
15 | #include <linux/ioport.h> | 15 | #include <linux/ioport.h> |
16 | #include <linux/slab.h> | ||
17 | #include <linux/timex.h> | 16 | #include <linux/timex.h> |
18 | #include <linux/io.h> | 17 | #include <linux/io.h> |
19 | 18 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 9a97116f89e5..4a45fd6e41ba 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c | |||
@@ -569,7 +569,7 @@ static int powernow_verify(struct cpufreq_policy *policy) | |||
569 | * We will then get the same kind of behaviour already tested under | 569 | * We will then get the same kind of behaviour already tested under |
570 | * the "well-known" other OS. | 570 | * the "well-known" other OS. |
571 | */ | 571 | */ |
572 | static int __init fixup_sgtc(void) | 572 | static int __cpuinit fixup_sgtc(void) |
573 | { | 573 | { |
574 | unsigned int sgtc; | 574 | unsigned int sgtc; |
575 | unsigned int m; | 575 | unsigned int m; |
@@ -603,7 +603,7 @@ static unsigned int powernow_get(unsigned int cpu) | |||
603 | } | 603 | } |
604 | 604 | ||
605 | 605 | ||
606 | static int __init acer_cpufreq_pst(const struct dmi_system_id *d) | 606 | static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d) |
607 | { | 607 | { |
608 | printk(KERN_WARNING PFX | 608 | printk(KERN_WARNING PFX |
609 | "%s laptop with broken PST tables in BIOS detected.\n", | 609 | "%s laptop with broken PST tables in BIOS detected.\n", |
@@ -621,7 +621,7 @@ static int __init acer_cpufreq_pst(const struct dmi_system_id *d) | |||
621 | * A BIOS update is all that can save them. | 621 | * A BIOS update is all that can save them. |
622 | * Mention this, and disable cpufreq. | 622 | * Mention this, and disable cpufreq. |
623 | */ | 623 | */ |
624 | static struct dmi_system_id __initdata powernow_dmi_table[] = { | 624 | static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = { |
625 | { | 625 | { |
626 | .callback = acer_cpufreq_pst, | 626 | .callback = acer_cpufreq_pst, |
627 | .ident = "Acer Aspire", | 627 | .ident = "Acer Aspire", |
@@ -633,7 +633,7 @@ static struct dmi_system_id __initdata powernow_dmi_table[] = { | |||
633 | { } | 633 | { } |
634 | }; | 634 | }; |
635 | 635 | ||
636 | static int __init powernow_cpu_init(struct cpufreq_policy *policy) | 636 | static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy) |
637 | { | 637 | { |
638 | union msr_fidvidstatus fidvidstatus; | 638 | union msr_fidvidstatus fidvidstatus; |
639 | int result; | 639 | int result; |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index f125e5c551c0..491977baf6c0 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -1,6 +1,5 @@ | |||
1 | |||
2 | /* | 1 | /* |
3 | * (c) 2003-2006 Advanced Micro Devices, Inc. | 2 | * (c) 2003-2010 Advanced Micro Devices, Inc. |
4 | * Your use of this code is subject to the terms and conditions of the | 3 | * Your use of this code is subject to the terms and conditions of the |
5 | * GNU general public license version 2. See "COPYING" or | 4 | * GNU general public license version 2. See "COPYING" or |
6 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
@@ -10,7 +9,7 @@ | |||
10 | * Based on the powernow-k7.c module written by Dave Jones. | 9 | * Based on the powernow-k7.c module written by Dave Jones. |
11 | * (C) 2003 Dave Jones on behalf of SuSE Labs | 10 | * (C) 2003 Dave Jones on behalf of SuSE Labs |
12 | * (C) 2004 Dominik Brodowski <linux@brodo.de> | 11 | * (C) 2004 Dominik Brodowski <linux@brodo.de> |
13 | * (C) 2004 Pavel Machek <pavel@suse.cz> | 12 | * (C) 2004 Pavel Machek <pavel@ucw.cz> |
14 | * Licensed under the terms of the GNU GPL License version 2. | 13 | * Licensed under the terms of the GNU GPL License version 2. |
15 | * Based upon datasheets & sample CPUs kindly provided by AMD. | 14 | * Based upon datasheets & sample CPUs kindly provided by AMD. |
16 | * | 15 | * |
@@ -46,6 +45,7 @@ | |||
46 | #define PFX "powernow-k8: " | 45 | #define PFX "powernow-k8: " |
47 | #define VERSION "version 2.20.00" | 46 | #define VERSION "version 2.20.00" |
48 | #include "powernow-k8.h" | 47 | #include "powernow-k8.h" |
48 | #include "mperf.h" | ||
49 | 49 | ||
50 | /* serialize freq changes */ | 50 | /* serialize freq changes */ |
51 | static DEFINE_MUTEX(fidvid_mutex); | 51 | static DEFINE_MUTEX(fidvid_mutex); |
@@ -54,6 +54,12 @@ static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); | |||
54 | 54 | ||
55 | static int cpu_family = CPU_OPTERON; | 55 | static int cpu_family = CPU_OPTERON; |
56 | 56 | ||
57 | /* core performance boost */ | ||
58 | static bool cpb_capable, cpb_enabled; | ||
59 | static struct msr __percpu *msrs; | ||
60 | |||
61 | static struct cpufreq_driver cpufreq_amd64_driver; | ||
62 | |||
57 | #ifndef CONFIG_SMP | 63 | #ifndef CONFIG_SMP |
58 | static inline const struct cpumask *cpu_core_mask(int cpu) | 64 | static inline const struct cpumask *cpu_core_mask(int cpu) |
59 | { | 65 | { |
@@ -800,13 +806,15 @@ static int find_psb_table(struct powernow_k8_data *data) | |||
800 | * www.amd.com | 806 | * www.amd.com |
801 | */ | 807 | */ |
802 | printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); | 808 | printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); |
809 | printk(KERN_ERR PFX "Make sure that your BIOS is up to date" | ||
810 | " and Cool'N'Quiet support is enabled in BIOS setup\n"); | ||
803 | return -ENODEV; | 811 | return -ENODEV; |
804 | } | 812 | } |
805 | 813 | ||
806 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, | 814 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, |
807 | unsigned int index) | 815 | unsigned int index) |
808 | { | 816 | { |
809 | acpi_integer control; | 817 | u64 control; |
810 | 818 | ||
811 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) | 819 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) |
812 | return; | 820 | return; |
@@ -824,7 +832,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
824 | { | 832 | { |
825 | struct cpufreq_frequency_table *powernow_table; | 833 | struct cpufreq_frequency_table *powernow_table; |
826 | int ret_val = -ENODEV; | 834 | int ret_val = -ENODEV; |
827 | acpi_integer control, status; | 835 | u64 control, status; |
828 | 836 | ||
829 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { | 837 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { |
830 | dprintk("register performance failed: bad ACPI data\n"); | 838 | dprintk("register performance failed: bad ACPI data\n"); |
@@ -904,8 +912,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, | |||
904 | { | 912 | { |
905 | int i; | 913 | int i; |
906 | u32 hi = 0, lo = 0; | 914 | u32 hi = 0, lo = 0; |
907 | rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); | 915 | rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi); |
908 | data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; | 916 | data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; |
909 | 917 | ||
910 | for (i = 0; i < data->acpi_data.state_count; i++) { | 918 | for (i = 0; i < data->acpi_data.state_count; i++) { |
911 | u32 index; | 919 | u32 index; |
@@ -929,7 +937,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, | |||
929 | powernow_table[i].index = index; | 937 | powernow_table[i].index = index; |
930 | 938 | ||
931 | /* Frequency may be rounded for these */ | 939 | /* Frequency may be rounded for these */ |
932 | if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { | 940 | if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10) |
941 | || boot_cpu_data.x86 == 0x11) { | ||
933 | powernow_table[i].frequency = | 942 | powernow_table[i].frequency = |
934 | freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); | 943 | freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); |
935 | } else | 944 | } else |
@@ -948,7 +957,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, | |||
948 | u32 fid; | 957 | u32 fid; |
949 | u32 vid; | 958 | u32 vid; |
950 | u32 freq, index; | 959 | u32 freq, index; |
951 | acpi_integer status, control; | 960 | u64 status, control; |
952 | 961 | ||
953 | if (data->exttype) { | 962 | if (data->exttype) { |
954 | status = data->acpi_data.states[i].status; | 963 | status = data->acpi_data.states[i].status; |
@@ -1016,13 +1025,12 @@ static int get_transition_latency(struct powernow_k8_data *data) | |||
1016 | } | 1025 | } |
1017 | if (max_latency == 0) { | 1026 | if (max_latency == 0) { |
1018 | /* | 1027 | /* |
1019 | * Fam 11h always returns 0 as transition latency. | 1028 | * Fam 11h and later may return 0 as transition latency. This |
1020 | * This is intended and means "very fast". While cpufreq core | 1029 | * is intended and means "very fast". While cpufreq core and |
1021 | * and governors currently can handle that gracefully, better | 1030 | * governors currently can handle that gracefully, better set it |
1022 | * set it to 1 to avoid problems in the future. | 1031 | * to 1 to avoid problems in the future. |
1023 | * For all others it's a BIOS bug. | ||
1024 | */ | 1032 | */ |
1025 | if (boot_cpu_data.x86 != 0x11) | 1033 | if (boot_cpu_data.x86 < 0x11) |
1026 | printk(KERN_ERR FW_WARN PFX "Invalid zero transition " | 1034 | printk(KERN_ERR FW_WARN PFX "Invalid zero transition " |
1027 | "latency\n"); | 1035 | "latency\n"); |
1028 | max_latency = 1; | 1036 | max_latency = 1; |
@@ -1248,6 +1256,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1248 | struct powernow_k8_data *data; | 1256 | struct powernow_k8_data *data; |
1249 | struct init_on_cpu init_on_cpu; | 1257 | struct init_on_cpu init_on_cpu; |
1250 | int rc; | 1258 | int rc; |
1259 | struct cpuinfo_x86 *c = &cpu_data(pol->cpu); | ||
1251 | 1260 | ||
1252 | if (!cpu_online(pol->cpu)) | 1261 | if (!cpu_online(pol->cpu)) |
1253 | return -ENODEV; | 1262 | return -ENODEV; |
@@ -1322,6 +1331,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1322 | return -EINVAL; | 1331 | return -EINVAL; |
1323 | } | 1332 | } |
1324 | 1333 | ||
1334 | /* Check for APERF/MPERF support in hardware */ | ||
1335 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | ||
1336 | cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf; | ||
1337 | |||
1325 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); | 1338 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); |
1326 | 1339 | ||
1327 | if (cpu_family == CPU_HW_PSTATE) | 1340 | if (cpu_family == CPU_HW_PSTATE) |
@@ -1356,6 +1369,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) | |||
1356 | 1369 | ||
1357 | kfree(data->powernow_table); | 1370 | kfree(data->powernow_table); |
1358 | kfree(data); | 1371 | kfree(data); |
1372 | per_cpu(powernow_data, pol->cpu) = NULL; | ||
1359 | 1373 | ||
1360 | return 0; | 1374 | return 0; |
1361 | } | 1375 | } |
@@ -1375,7 +1389,7 @@ static unsigned int powernowk8_get(unsigned int cpu) | |||
1375 | int err; | 1389 | int err; |
1376 | 1390 | ||
1377 | if (!data) | 1391 | if (!data) |
1378 | return -EINVAL; | 1392 | return 0; |
1379 | 1393 | ||
1380 | smp_call_function_single(cpu, query_values_on_cpu, &err, true); | 1394 | smp_call_function_single(cpu, query_values_on_cpu, &err, true); |
1381 | if (err) | 1395 | if (err) |
@@ -1392,8 +1406,77 @@ out: | |||
1392 | return khz; | 1406 | return khz; |
1393 | } | 1407 | } |
1394 | 1408 | ||
1409 | static void _cpb_toggle_msrs(bool t) | ||
1410 | { | ||
1411 | int cpu; | ||
1412 | |||
1413 | get_online_cpus(); | ||
1414 | |||
1415 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1416 | |||
1417 | for_each_cpu(cpu, cpu_online_mask) { | ||
1418 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
1419 | if (t) | ||
1420 | reg->l &= ~BIT(25); | ||
1421 | else | ||
1422 | reg->l |= BIT(25); | ||
1423 | } | ||
1424 | wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1425 | |||
1426 | put_online_cpus(); | ||
1427 | } | ||
1428 | |||
1429 | /* | ||
1430 | * Switch on/off core performance boosting. | ||
1431 | * | ||
1432 | * 0=disable | ||
1433 | * 1=enable. | ||
1434 | */ | ||
1435 | static void cpb_toggle(bool t) | ||
1436 | { | ||
1437 | if (!cpb_capable) | ||
1438 | return; | ||
1439 | |||
1440 | if (t && !cpb_enabled) { | ||
1441 | cpb_enabled = true; | ||
1442 | _cpb_toggle_msrs(t); | ||
1443 | printk(KERN_INFO PFX "Core Boosting enabled.\n"); | ||
1444 | } else if (!t && cpb_enabled) { | ||
1445 | cpb_enabled = false; | ||
1446 | _cpb_toggle_msrs(t); | ||
1447 | printk(KERN_INFO PFX "Core Boosting disabled.\n"); | ||
1448 | } | ||
1449 | } | ||
1450 | |||
1451 | static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, | ||
1452 | size_t count) | ||
1453 | { | ||
1454 | int ret = -EINVAL; | ||
1455 | unsigned long val = 0; | ||
1456 | |||
1457 | ret = strict_strtoul(buf, 10, &val); | ||
1458 | if (!ret && (val == 0 || val == 1) && cpb_capable) | ||
1459 | cpb_toggle(val); | ||
1460 | else | ||
1461 | return -EINVAL; | ||
1462 | |||
1463 | return count; | ||
1464 | } | ||
1465 | |||
1466 | static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf) | ||
1467 | { | ||
1468 | return sprintf(buf, "%u\n", cpb_enabled); | ||
1469 | } | ||
1470 | |||
1471 | #define define_one_rw(_name) \ | ||
1472 | static struct freq_attr _name = \ | ||
1473 | __ATTR(_name, 0644, show_##_name, store_##_name) | ||
1474 | |||
1475 | define_one_rw(cpb); | ||
1476 | |||
1395 | static struct freq_attr *powernow_k8_attr[] = { | 1477 | static struct freq_attr *powernow_k8_attr[] = { |
1396 | &cpufreq_freq_attr_scaling_available_freqs, | 1478 | &cpufreq_freq_attr_scaling_available_freqs, |
1479 | &cpb, | ||
1397 | NULL, | 1480 | NULL, |
1398 | }; | 1481 | }; |
1399 | 1482 | ||
@@ -1409,10 +1492,51 @@ static struct cpufreq_driver cpufreq_amd64_driver = { | |||
1409 | .attr = powernow_k8_attr, | 1492 | .attr = powernow_k8_attr, |
1410 | }; | 1493 | }; |
1411 | 1494 | ||
1495 | /* | ||
1496 | * Clear the boost-disable flag on the CPU_DOWN path so that this cpu | ||
1497 | * cannot block the remaining ones from boosting. On the CPU_UP path we | ||
1498 | * simply keep the boost-disable flag in sync with the current global | ||
1499 | * state. | ||
1500 | */ | ||
1501 | static int cpb_notify(struct notifier_block *nb, unsigned long action, | ||
1502 | void *hcpu) | ||
1503 | { | ||
1504 | unsigned cpu = (long)hcpu; | ||
1505 | u32 lo, hi; | ||
1506 | |||
1507 | switch (action) { | ||
1508 | case CPU_UP_PREPARE: | ||
1509 | case CPU_UP_PREPARE_FROZEN: | ||
1510 | |||
1511 | if (!cpb_enabled) { | ||
1512 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
1513 | lo |= BIT(25); | ||
1514 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
1515 | } | ||
1516 | break; | ||
1517 | |||
1518 | case CPU_DOWN_PREPARE: | ||
1519 | case CPU_DOWN_PREPARE_FROZEN: | ||
1520 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
1521 | lo &= ~BIT(25); | ||
1522 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
1523 | break; | ||
1524 | |||
1525 | default: | ||
1526 | break; | ||
1527 | } | ||
1528 | |||
1529 | return NOTIFY_OK; | ||
1530 | } | ||
1531 | |||
1532 | static struct notifier_block cpb_nb = { | ||
1533 | .notifier_call = cpb_notify, | ||
1534 | }; | ||
1535 | |||
1412 | /* driver entry point for init */ | 1536 | /* driver entry point for init */ |
1413 | static int __cpuinit powernowk8_init(void) | 1537 | static int __cpuinit powernowk8_init(void) |
1414 | { | 1538 | { |
1415 | unsigned int i, supported_cpus = 0; | 1539 | unsigned int i, supported_cpus = 0, cpu; |
1416 | 1540 | ||
1417 | for_each_online_cpu(i) { | 1541 | for_each_online_cpu(i) { |
1418 | int rc; | 1542 | int rc; |
@@ -1421,15 +1545,36 @@ static int __cpuinit powernowk8_init(void) | |||
1421 | supported_cpus++; | 1545 | supported_cpus++; |
1422 | } | 1546 | } |
1423 | 1547 | ||
1424 | if (supported_cpus == num_online_cpus()) { | 1548 | if (supported_cpus != num_online_cpus()) |
1425 | printk(KERN_INFO PFX "Found %d %s " | 1549 | return -ENODEV; |
1426 | "processors (%d cpu cores) (" VERSION ")\n", | 1550 | |
1427 | num_online_nodes(), | 1551 | printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", |
1428 | boot_cpu_data.x86_model_id, supported_cpus); | 1552 | num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); |
1429 | return cpufreq_register_driver(&cpufreq_amd64_driver); | 1553 | |
1554 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
1555 | |||
1556 | cpb_capable = true; | ||
1557 | |||
1558 | register_cpu_notifier(&cpb_nb); | ||
1559 | |||
1560 | msrs = msrs_alloc(); | ||
1561 | if (!msrs) { | ||
1562 | printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); | ||
1563 | return -ENOMEM; | ||
1564 | } | ||
1565 | |||
1566 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1567 | |||
1568 | for_each_cpu(cpu, cpu_online_mask) { | ||
1569 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
1570 | cpb_enabled |= !(!!(reg->l & BIT(25))); | ||
1571 | } | ||
1572 | |||
1573 | printk(KERN_INFO PFX "Core Performance Boosting: %s.\n", | ||
1574 | (cpb_enabled ? "on" : "off")); | ||
1430 | } | 1575 | } |
1431 | 1576 | ||
1432 | return -ENODEV; | 1577 | return cpufreq_register_driver(&cpufreq_amd64_driver); |
1433 | } | 1578 | } |
1434 | 1579 | ||
1435 | /* driver entry point for term */ | 1580 | /* driver entry point for term */ |
@@ -1437,6 +1582,13 @@ static void __exit powernowk8_exit(void) | |||
1437 | { | 1582 | { |
1438 | dprintk("exit\n"); | 1583 | dprintk("exit\n"); |
1439 | 1584 | ||
1585 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
1586 | msrs_free(msrs); | ||
1587 | msrs = NULL; | ||
1588 | |||
1589 | unregister_cpu_notifier(&cpb_nb); | ||
1590 | } | ||
1591 | |||
1440 | cpufreq_unregister_driver(&cpufreq_amd64_driver); | 1592 | cpufreq_unregister_driver(&cpufreq_amd64_driver); |
1441 | } | 1593 | } |
1442 | 1594 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 02ce824073cb..df3529b1c02d 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h | |||
@@ -5,7 +5,6 @@ | |||
5 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
6 | */ | 6 | */ |
7 | 7 | ||
8 | |||
9 | enum pstate { | 8 | enum pstate { |
10 | HW_PSTATE_INVALID = 0xff, | 9 | HW_PSTATE_INVALID = 0xff, |
11 | HW_PSTATE_0 = 0, | 10 | HW_PSTATE_0 = 0, |
@@ -55,7 +54,6 @@ struct powernow_k8_data { | |||
55 | struct cpumask *available_cores; | 54 | struct cpumask *available_cores; |
56 | }; | 55 | }; |
57 | 56 | ||
58 | |||
59 | /* processor's cpuid instruction support */ | 57 | /* processor's cpuid instruction support */ |
60 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ | 58 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ |
61 | #define CPUID_XFAM 0x0ff00000 /* extended family */ | 59 | #define CPUID_XFAM 0x0ff00000 /* extended family */ |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 8d672ef162ce..9b1ff37de46a 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/sched.h> /* current */ | 20 | #include <linux/sched.h> /* current */ |
21 | #include <linux/delay.h> | 21 | #include <linux/delay.h> |
22 | #include <linux/compiler.h> | 22 | #include <linux/compiler.h> |
23 | #include <linux/gfp.h> | ||
23 | 24 | ||
24 | #include <asm/msr.h> | 25 | #include <asm/msr.h> |
25 | #include <asm/processor.h> | 26 | #include <asm/processor.h> |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 2ce8e0b5cc54..561758e95180 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/init.h> | 23 | #include <linux/init.h> |
24 | #include <linux/cpufreq.h> | 24 | #include <linux/cpufreq.h> |
25 | #include <linux/pci.h> | 25 | #include <linux/pci.h> |
26 | #include <linux/slab.h> | ||
27 | #include <linux/sched.h> | 26 | #include <linux/sched.h> |
28 | 27 | ||
29 | #include "speedstep-lib.h" | 28 | #include "speedstep-lib.h" |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index ad0083abfa23..a94ec6be69fa 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/moduleparam.h> | 13 | #include <linux/moduleparam.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/cpufreq.h> | 15 | #include <linux/cpufreq.h> |
16 | #include <linux/slab.h> | ||
17 | 16 | ||
18 | #include <asm/msr.h> | 17 | #include <asm/msr.h> |
19 | #include <asm/tsc.h> | 18 | #include <asm/tsc.h> |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index 04d73c114e49..8abd869baabf 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/moduleparam.h> | 17 | #include <linux/moduleparam.h> |
18 | #include <linux/init.h> | 18 | #include <linux/init.h> |
19 | #include <linux/cpufreq.h> | 19 | #include <linux/cpufreq.h> |
20 | #include <linux/slab.h> | ||
21 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
22 | #include <linux/io.h> | 21 | #include <linux/io.h> |
23 | #include <asm/ist.h> | 22 | #include <asm/ist.h> |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 08be922de33a..8095f8611f8a 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -21,37 +21,58 @@ | |||
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/module.h> | ||
24 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
25 | #include <asm/vmware.h> | ||
26 | #include <asm/hypervisor.h> | 26 | #include <asm/hypervisor.h> |
27 | 27 | ||
28 | static inline void __cpuinit | 28 | /* |
29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) | 29 | * Hypervisor detect order. This is specified explicitly here because |
30 | * some hypervisors might implement compatibility modes for other | ||
31 | * hypervisors and therefore need to be detected in specific sequence. | ||
32 | */ | ||
33 | static const __initconst struct hypervisor_x86 * const hypervisors[] = | ||
30 | { | 34 | { |
31 | if (vmware_platform()) | 35 | &x86_hyper_vmware, |
32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; | 36 | &x86_hyper_ms_hyperv, |
33 | else | 37 | #ifdef CONFIG_XEN_PVHVM |
34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; | 38 | &x86_hyper_xen_hvm, |
35 | } | 39 | #endif |
40 | }; | ||
36 | 41 | ||
37 | static inline void __cpuinit | 42 | const struct hypervisor_x86 *x86_hyper; |
38 | hypervisor_set_feature_bits(struct cpuinfo_x86 *c) | 43 | EXPORT_SYMBOL(x86_hyper); |
44 | |||
45 | static inline void __init | ||
46 | detect_hypervisor_vendor(void) | ||
39 | { | 47 | { |
40 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) { | 48 | const struct hypervisor_x86 *h, * const *p; |
41 | vmware_set_feature_bits(c); | 49 | |
42 | return; | 50 | for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { |
51 | h = *p; | ||
52 | if (h->detect()) { | ||
53 | x86_hyper = h; | ||
54 | printk(KERN_INFO "Hypervisor detected: %s\n", h->name); | ||
55 | break; | ||
56 | } | ||
43 | } | 57 | } |
44 | } | 58 | } |
45 | 59 | ||
46 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) | 60 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) |
47 | { | 61 | { |
48 | detect_hypervisor_vendor(c); | 62 | if (x86_hyper && x86_hyper->set_cpu_features) |
49 | hypervisor_set_feature_bits(c); | 63 | x86_hyper->set_cpu_features(c); |
50 | } | 64 | } |
51 | 65 | ||
52 | void __init init_hypervisor_platform(void) | 66 | void __init init_hypervisor_platform(void) |
53 | { | 67 | { |
68 | |||
69 | detect_hypervisor_vendor(); | ||
70 | |||
71 | if (!x86_hyper) | ||
72 | return; | ||
73 | |||
54 | init_hypervisor(&boot_cpu_data); | 74 | init_hypervisor(&boot_cpu_data); |
55 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | 75 | |
56 | vmware_platform_setup(); | 76 | if (x86_hyper->init_platform) |
77 | x86_hyper->init_platform(); | ||
57 | } | 78 | } |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 879666f4d871..b4389441efbb 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <asm/processor.h> | 12 | #include <asm/processor.h> |
13 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
15 | #include <asm/ds.h> | ||
16 | #include <asm/bugs.h> | 15 | #include <asm/bugs.h> |
17 | #include <asm/cpu.h> | 16 | #include <asm/cpu.h> |
18 | 17 | ||
@@ -40,6 +39,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
40 | misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; | 39 | misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; |
41 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | 40 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); |
42 | c->cpuid_level = cpuid_eax(0); | 41 | c->cpuid_level = cpuid_eax(0); |
42 | get_cpu_cap(c); | ||
43 | } | 43 | } |
44 | } | 44 | } |
45 | 45 | ||
@@ -47,6 +47,27 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
47 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | 47 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) |
48 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 48 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
49 | 49 | ||
50 | /* | ||
51 | * Atom erratum AAE44/AAF40/AAG38/AAH41: | ||
52 | * | ||
53 | * A race condition between speculative fetches and invalidating | ||
54 | * a large page. This is worked around in microcode, but we | ||
55 | * need the microcode to have already been loaded... so if it is | ||
56 | * not, recommend a BIOS update and disable large pages. | ||
57 | */ | ||
58 | if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2) { | ||
59 | u32 ucode, junk; | ||
60 | |||
61 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
62 | sync_core(); | ||
63 | rdmsr(MSR_IA32_UCODE_REV, junk, ucode); | ||
64 | |||
65 | if (ucode < 0x20e) { | ||
66 | printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n"); | ||
67 | clear_cpu_cap(c, X86_FEATURE_PSE); | ||
68 | } | ||
69 | } | ||
70 | |||
50 | #ifdef CONFIG_X86_64 | 71 | #ifdef CONFIG_X86_64 |
51 | set_cpu_cap(c, X86_FEATURE_SYSENTER32); | 72 | set_cpu_cap(c, X86_FEATURE_SYSENTER32); |
52 | #else | 73 | #else |
@@ -70,7 +91,8 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
70 | if (c->x86_power & (1 << 8)) { | 91 | if (c->x86_power & (1 << 8)) { |
71 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 92 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
72 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 93 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
73 | sched_clock_stable = 1; | 94 | if (!check_tsc_unstable()) |
95 | sched_clock_stable = 1; | ||
74 | } | 96 | } |
75 | 97 | ||
76 | /* | 98 | /* |
@@ -351,12 +373,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
351 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | 373 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
352 | } | 374 | } |
353 | 375 | ||
354 | if (c->cpuid_level > 6) { | ||
355 | unsigned ecx = cpuid_ecx(6); | ||
356 | if (ecx & 0x01) | ||
357 | set_cpu_cap(c, X86_FEATURE_APERFMPERF); | ||
358 | } | ||
359 | |||
360 | if (cpu_has_xmm2) | 376 | if (cpu_has_xmm2) |
361 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | 377 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
362 | if (cpu_has_ds) { | 378 | if (cpu_has_ds) { |
@@ -366,7 +382,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
366 | set_cpu_cap(c, X86_FEATURE_BTS); | 382 | set_cpu_cap(c, X86_FEATURE_BTS); |
367 | if (!(l1 & (1<<12))) | 383 | if (!(l1 & (1<<12))) |
368 | set_cpu_cap(c, X86_FEATURE_PEBS); | 384 | set_cpu_cap(c, X86_FEATURE_PEBS); |
369 | ds_init_intel(c); | ||
370 | } | 385 | } |
371 | 386 | ||
372 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) | 387 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index fc6c8ef92dcc..898c2f4eab88 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
20 | #include <asm/k8.h> | 20 | #include <asm/k8.h> |
21 | #include <asm/smp.h> | ||
21 | 22 | ||
22 | #define LVL_1_INST 1 | 23 | #define LVL_1_INST 1 |
23 | #define LVL_1_DATA 2 | 24 | #define LVL_1_DATA 2 |
@@ -31,6 +32,8 @@ struct _cache_table { | |||
31 | short size; | 32 | short size; |
32 | }; | 33 | }; |
33 | 34 | ||
35 | #define MB(x) ((x) * 1024) | ||
36 | |||
34 | /* All the cache descriptor types we care about (no TLB or | 37 | /* All the cache descriptor types we care about (no TLB or |
35 | trace cache entries) */ | 38 | trace cache entries) */ |
36 | 39 | ||
@@ -44,9 +47,9 @@ static const struct _cache_table __cpuinitconst cache_table[] = | |||
44 | { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ | 47 | { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ |
45 | { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ | 48 | { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ |
46 | { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | 49 | { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
47 | { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 50 | { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
48 | { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 51 | { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
49 | { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 52 | { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
50 | { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ | 53 | { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ |
51 | { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ | 54 | { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ |
52 | { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | 55 | { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
@@ -59,16 +62,16 @@ static const struct _cache_table __cpuinitconst cache_table[] = | |||
59 | { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ | 62 | { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ |
60 | { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ | 63 | { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ |
61 | { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ | 64 | { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ |
62 | { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ | 65 | { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */ |
63 | { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ | 66 | { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */ |
64 | { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ | 67 | { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */ |
65 | { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ | 68 | { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */ |
66 | { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ | 69 | { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ |
67 | { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ | 70 | { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */ |
68 | { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ | 71 | { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ |
69 | { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ | 72 | { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */ |
70 | { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ | 73 | { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */ |
71 | { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */ | 74 | { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */ |
72 | { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 75 | { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
73 | { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | 76 | { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
74 | { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | 77 | { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
@@ -77,34 +80,34 @@ static const struct _cache_table __cpuinitconst cache_table[] = | |||
77 | { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ | 80 | { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ |
78 | { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ | 81 | { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ |
79 | { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ | 82 | { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ |
80 | { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ | 83 | { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */ |
81 | { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 84 | { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
82 | { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 85 | { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
83 | { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 86 | { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
84 | { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 87 | { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
85 | { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ | 88 | { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */ |
86 | { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ | 89 | { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ |
87 | { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ | 90 | { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ |
88 | { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ | 91 | { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ |
89 | { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ | 92 | { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */ |
90 | { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ | 93 | { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */ |
91 | { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ | 94 | { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ |
92 | { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ | 95 | { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */ |
93 | { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ | 96 | { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ |
94 | { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ | 97 | { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */ |
95 | { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ | 98 | { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */ |
96 | { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ | 99 | { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */ |
97 | { 0xd7, LVL_3, 2048 }, /* 8-way set assoc, 64 byte line size */ | 100 | { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */ |
98 | { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ | 101 | { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ |
99 | { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ | 102 | { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */ |
100 | { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ | 103 | { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ |
101 | { 0xde, LVL_3, 8192 }, /* 12-way set assoc, 64 byte line size */ | 104 | { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */ |
102 | { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ | 105 | { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */ |
103 | { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ | 106 | { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ |
104 | { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ | 107 | { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ |
105 | { 0xea, LVL_3, 12288 }, /* 24-way set assoc, 64 byte line size */ | 108 | { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */ |
106 | { 0xeb, LVL_3, 18432 }, /* 24-way set assoc, 64 byte line size */ | 109 | { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */ |
107 | { 0xec, LVL_3, 24576 }, /* 24-way set assoc, 64 byte line size */ | 110 | { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */ |
108 | { 0x00, 0, 0} | 111 | { 0x00, 0, 0} |
109 | }; | 112 | }; |
110 | 113 | ||
@@ -145,12 +148,19 @@ union _cpuid4_leaf_ecx { | |||
145 | u32 full; | 148 | u32 full; |
146 | }; | 149 | }; |
147 | 150 | ||
151 | struct amd_l3_cache { | ||
152 | struct pci_dev *dev; | ||
153 | bool can_disable; | ||
154 | unsigned indices; | ||
155 | u8 subcaches[4]; | ||
156 | }; | ||
157 | |||
148 | struct _cpuid4_info { | 158 | struct _cpuid4_info { |
149 | union _cpuid4_leaf_eax eax; | 159 | union _cpuid4_leaf_eax eax; |
150 | union _cpuid4_leaf_ebx ebx; | 160 | union _cpuid4_leaf_ebx ebx; |
151 | union _cpuid4_leaf_ecx ecx; | 161 | union _cpuid4_leaf_ecx ecx; |
152 | unsigned long size; | 162 | unsigned long size; |
153 | unsigned long can_disable; | 163 | struct amd_l3_cache *l3; |
154 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); | 164 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); |
155 | }; | 165 | }; |
156 | 166 | ||
@@ -160,7 +170,7 @@ struct _cpuid4_info_regs { | |||
160 | union _cpuid4_leaf_ebx ebx; | 170 | union _cpuid4_leaf_ebx ebx; |
161 | union _cpuid4_leaf_ecx ecx; | 171 | union _cpuid4_leaf_ecx ecx; |
162 | unsigned long size; | 172 | unsigned long size; |
163 | unsigned long can_disable; | 173 | struct amd_l3_cache *l3; |
164 | }; | 174 | }; |
165 | 175 | ||
166 | unsigned short num_cache_leaves; | 176 | unsigned short num_cache_leaves; |
@@ -290,22 +300,269 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
290 | (ebx->split.ways_of_associativity + 1) - 1; | 300 | (ebx->split.ways_of_associativity + 1) - 1; |
291 | } | 301 | } |
292 | 302 | ||
293 | static void __cpuinit | 303 | struct _cache_attr { |
294 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | 304 | struct attribute attr; |
305 | ssize_t (*show)(struct _cpuid4_info *, char *); | ||
306 | ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); | ||
307 | }; | ||
308 | |||
309 | #ifdef CONFIG_CPU_SUP_AMD | ||
310 | |||
311 | /* | ||
312 | * L3 cache descriptors | ||
313 | */ | ||
314 | static struct amd_l3_cache **__cpuinitdata l3_caches; | ||
315 | |||
316 | static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) | ||
317 | { | ||
318 | unsigned int sc0, sc1, sc2, sc3; | ||
319 | u32 val = 0; | ||
320 | |||
321 | pci_read_config_dword(l3->dev, 0x1C4, &val); | ||
322 | |||
323 | /* calculate subcache sizes */ | ||
324 | l3->subcaches[0] = sc0 = !(val & BIT(0)); | ||
325 | l3->subcaches[1] = sc1 = !(val & BIT(4)); | ||
326 | l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); | ||
327 | l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); | ||
328 | |||
329 | l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; | ||
330 | } | ||
331 | |||
332 | static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) | ||
333 | { | ||
334 | struct amd_l3_cache *l3; | ||
335 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
336 | |||
337 | l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC); | ||
338 | if (!l3) { | ||
339 | printk(KERN_WARNING "Error allocating L3 struct\n"); | ||
340 | return NULL; | ||
341 | } | ||
342 | |||
343 | l3->dev = dev; | ||
344 | |||
345 | amd_calc_l3_indices(l3); | ||
346 | |||
347 | return l3; | ||
348 | } | ||
349 | |||
350 | static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, | ||
351 | int index) | ||
295 | { | 352 | { |
353 | int node; | ||
354 | |||
355 | if (boot_cpu_data.x86 != 0x10) | ||
356 | return; | ||
357 | |||
296 | if (index < 3) | 358 | if (index < 3) |
297 | return; | 359 | return; |
298 | 360 | ||
299 | if (boot_cpu_data.x86 == 0x11) | 361 | /* see errata #382 and #388 */ |
362 | if (boot_cpu_data.x86_model < 0x8) | ||
300 | return; | 363 | return; |
301 | 364 | ||
302 | /* see erratum #382 */ | 365 | if ((boot_cpu_data.x86_model == 0x8 || |
303 | if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8)) | 366 | boot_cpu_data.x86_model == 0x9) |
367 | && | ||
368 | boot_cpu_data.x86_mask < 0x1) | ||
369 | return; | ||
370 | |||
371 | /* not in virtualized environments */ | ||
372 | if (num_k8_northbridges == 0) | ||
304 | return; | 373 | return; |
305 | 374 | ||
306 | this_leaf->can_disable = 1; | 375 | /* |
376 | * Strictly speaking, the amount in @size below is leaked since it is | ||
377 | * never freed but this is done only on shutdown so it doesn't matter. | ||
378 | */ | ||
379 | if (!l3_caches) { | ||
380 | int size = num_k8_northbridges * sizeof(struct amd_l3_cache *); | ||
381 | |||
382 | l3_caches = kzalloc(size, GFP_ATOMIC); | ||
383 | if (!l3_caches) | ||
384 | return; | ||
385 | } | ||
386 | |||
387 | node = amd_get_nb_id(smp_processor_id()); | ||
388 | |||
389 | if (!l3_caches[node]) { | ||
390 | l3_caches[node] = amd_init_l3_cache(node); | ||
391 | l3_caches[node]->can_disable = true; | ||
392 | } | ||
393 | |||
394 | WARN_ON(!l3_caches[node]); | ||
395 | |||
396 | this_leaf->l3 = l3_caches[node]; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * check whether a slot used for disabling an L3 index is occupied. | ||
401 | * @l3: L3 cache descriptor | ||
402 | * @slot: slot number (0..1) | ||
403 | * | ||
404 | * @returns: the disabled index if used or negative value if slot free. | ||
405 | */ | ||
406 | int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot) | ||
407 | { | ||
408 | unsigned int reg = 0; | ||
409 | |||
410 | pci_read_config_dword(l3->dev, 0x1BC + slot * 4, ®); | ||
411 | |||
412 | /* check whether this slot is activated already */ | ||
413 | if (reg & (3UL << 30)) | ||
414 | return reg & 0xfff; | ||
415 | |||
416 | return -1; | ||
417 | } | ||
418 | |||
419 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | ||
420 | unsigned int slot) | ||
421 | { | ||
422 | int index; | ||
423 | |||
424 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) | ||
425 | return -EINVAL; | ||
426 | |||
427 | index = amd_get_l3_disable_slot(this_leaf->l3, slot); | ||
428 | if (index >= 0) | ||
429 | return sprintf(buf, "%d\n", index); | ||
430 | |||
431 | return sprintf(buf, "FREE\n"); | ||
432 | } | ||
433 | |||
434 | #define SHOW_CACHE_DISABLE(slot) \ | ||
435 | static ssize_t \ | ||
436 | show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \ | ||
437 | { \ | ||
438 | return show_cache_disable(this_leaf, buf, slot); \ | ||
439 | } | ||
440 | SHOW_CACHE_DISABLE(0) | ||
441 | SHOW_CACHE_DISABLE(1) | ||
442 | |||
443 | static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, | ||
444 | unsigned slot, unsigned long idx) | ||
445 | { | ||
446 | int i; | ||
447 | |||
448 | idx |= BIT(30); | ||
449 | |||
450 | /* | ||
451 | * disable index in all 4 subcaches | ||
452 | */ | ||
453 | for (i = 0; i < 4; i++) { | ||
454 | u32 reg = idx | (i << 20); | ||
455 | |||
456 | if (!l3->subcaches[i]) | ||
457 | continue; | ||
458 | |||
459 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | ||
460 | |||
461 | /* | ||
462 | * We need to WBINVD on a core on the node containing the L3 | ||
463 | * cache which indices we disable therefore a simple wbinvd() | ||
464 | * is not sufficient. | ||
465 | */ | ||
466 | wbinvd_on_cpu(cpu); | ||
467 | |||
468 | reg |= BIT(31); | ||
469 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | ||
470 | } | ||
307 | } | 471 | } |
308 | 472 | ||
473 | /* | ||
474 | * disable a L3 cache index by using a disable-slot | ||
475 | * | ||
476 | * @l3: L3 cache descriptor | ||
477 | * @cpu: A CPU on the node containing the L3 cache | ||
478 | * @slot: slot number (0..1) | ||
479 | * @index: index to disable | ||
480 | * | ||
481 | * @return: 0 on success, error status on failure | ||
482 | */ | ||
483 | int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot, | ||
484 | unsigned long index) | ||
485 | { | ||
486 | int ret = 0; | ||
487 | |||
488 | #define SUBCACHE_MASK (3UL << 20) | ||
489 | #define SUBCACHE_INDEX 0xfff | ||
490 | |||
491 | /* | ||
492 | * check whether this slot is already used or | ||
493 | * the index is already disabled | ||
494 | */ | ||
495 | ret = amd_get_l3_disable_slot(l3, slot); | ||
496 | if (ret >= 0) | ||
497 | return -EINVAL; | ||
498 | |||
499 | /* | ||
500 | * check whether the other slot has disabled the | ||
501 | * same index already | ||
502 | */ | ||
503 | if (index == amd_get_l3_disable_slot(l3, !slot)) | ||
504 | return -EINVAL; | ||
505 | |||
506 | /* do not allow writes outside of allowed bits */ | ||
507 | if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | ||
508 | ((index & SUBCACHE_INDEX) > l3->indices)) | ||
509 | return -EINVAL; | ||
510 | |||
511 | amd_l3_disable_index(l3, cpu, slot, index); | ||
512 | |||
513 | return 0; | ||
514 | } | ||
515 | |||
516 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | ||
517 | const char *buf, size_t count, | ||
518 | unsigned int slot) | ||
519 | { | ||
520 | unsigned long val = 0; | ||
521 | int cpu, err = 0; | ||
522 | |||
523 | if (!capable(CAP_SYS_ADMIN)) | ||
524 | return -EPERM; | ||
525 | |||
526 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) | ||
527 | return -EINVAL; | ||
528 | |||
529 | cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
530 | |||
531 | if (strict_strtoul(buf, 10, &val) < 0) | ||
532 | return -EINVAL; | ||
533 | |||
534 | err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val); | ||
535 | if (err) { | ||
536 | if (err == -EEXIST) | ||
537 | printk(KERN_WARNING "L3 disable slot %d in use!\n", | ||
538 | slot); | ||
539 | return err; | ||
540 | } | ||
541 | return count; | ||
542 | } | ||
543 | |||
544 | #define STORE_CACHE_DISABLE(slot) \ | ||
545 | static ssize_t \ | ||
546 | store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ | ||
547 | const char *buf, size_t count) \ | ||
548 | { \ | ||
549 | return store_cache_disable(this_leaf, buf, count, slot); \ | ||
550 | } | ||
551 | STORE_CACHE_DISABLE(0) | ||
552 | STORE_CACHE_DISABLE(1) | ||
553 | |||
554 | static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, | ||
555 | show_cache_disable_0, store_cache_disable_0); | ||
556 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | ||
557 | show_cache_disable_1, store_cache_disable_1); | ||
558 | |||
559 | #else /* CONFIG_CPU_SUP_AMD */ | ||
560 | static void __cpuinit | ||
561 | amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) | ||
562 | { | ||
563 | }; | ||
564 | #endif /* CONFIG_CPU_SUP_AMD */ | ||
565 | |||
309 | static int | 566 | static int |
310 | __cpuinit cpuid4_cache_lookup_regs(int index, | 567 | __cpuinit cpuid4_cache_lookup_regs(int index, |
311 | struct _cpuid4_info_regs *this_leaf) | 568 | struct _cpuid4_info_regs *this_leaf) |
@@ -317,8 +574,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index, | |||
317 | 574 | ||
318 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | 575 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
319 | amd_cpuid4(index, &eax, &ebx, &ecx); | 576 | amd_cpuid4(index, &eax, &ebx, &ecx); |
320 | if (boot_cpu_data.x86 >= 0x10) | 577 | amd_check_l3_disable(this_leaf, index); |
321 | amd_check_l3_disable(index, this_leaf); | ||
322 | } else { | 578 | } else { |
323 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | 579 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); |
324 | } | 580 | } |
@@ -575,6 +831,7 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) | |||
575 | for (i = 0; i < num_cache_leaves; i++) | 831 | for (i = 0; i < num_cache_leaves; i++) |
576 | cache_remove_shared_cpu_map(cpu, i); | 832 | cache_remove_shared_cpu_map(cpu, i); |
577 | 833 | ||
834 | kfree(per_cpu(ici_cpuid4_info, cpu)->l3); | ||
578 | kfree(per_cpu(ici_cpuid4_info, cpu)); | 835 | kfree(per_cpu(ici_cpuid4_info, cpu)); |
579 | per_cpu(ici_cpuid4_info, cpu) = NULL; | 836 | per_cpu(ici_cpuid4_info, cpu) = NULL; |
580 | } | 837 | } |
@@ -711,82 +968,6 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) | |||
711 | #define to_object(k) container_of(k, struct _index_kobject, kobj) | 968 | #define to_object(k) container_of(k, struct _index_kobject, kobj) |
712 | #define to_attr(a) container_of(a, struct _cache_attr, attr) | 969 | #define to_attr(a) container_of(a, struct _cache_attr, attr) |
713 | 970 | ||
714 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | ||
715 | unsigned int index) | ||
716 | { | ||
717 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
718 | int node = cpu_to_node(cpu); | ||
719 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
720 | unsigned int reg = 0; | ||
721 | |||
722 | if (!this_leaf->can_disable) | ||
723 | return -EINVAL; | ||
724 | |||
725 | if (!dev) | ||
726 | return -EINVAL; | ||
727 | |||
728 | pci_read_config_dword(dev, 0x1BC + index * 4, ®); | ||
729 | return sprintf(buf, "%x\n", reg); | ||
730 | } | ||
731 | |||
732 | #define SHOW_CACHE_DISABLE(index) \ | ||
733 | static ssize_t \ | ||
734 | show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ | ||
735 | { \ | ||
736 | return show_cache_disable(this_leaf, buf, index); \ | ||
737 | } | ||
738 | SHOW_CACHE_DISABLE(0) | ||
739 | SHOW_CACHE_DISABLE(1) | ||
740 | |||
741 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | ||
742 | const char *buf, size_t count, unsigned int index) | ||
743 | { | ||
744 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
745 | int node = cpu_to_node(cpu); | ||
746 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
747 | unsigned long val = 0; | ||
748 | unsigned int scrubber = 0; | ||
749 | |||
750 | if (!this_leaf->can_disable) | ||
751 | return -EINVAL; | ||
752 | |||
753 | if (!capable(CAP_SYS_ADMIN)) | ||
754 | return -EPERM; | ||
755 | |||
756 | if (!dev) | ||
757 | return -EINVAL; | ||
758 | |||
759 | if (strict_strtoul(buf, 10, &val) < 0) | ||
760 | return -EINVAL; | ||
761 | |||
762 | val |= 0xc0000000; | ||
763 | |||
764 | pci_read_config_dword(dev, 0x58, &scrubber); | ||
765 | scrubber &= ~0x1f000000; | ||
766 | pci_write_config_dword(dev, 0x58, scrubber); | ||
767 | |||
768 | pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); | ||
769 | wbinvd(); | ||
770 | pci_write_config_dword(dev, 0x1BC + index * 4, val); | ||
771 | return count; | ||
772 | } | ||
773 | |||
774 | #define STORE_CACHE_DISABLE(index) \ | ||
775 | static ssize_t \ | ||
776 | store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ | ||
777 | const char *buf, size_t count) \ | ||
778 | { \ | ||
779 | return store_cache_disable(this_leaf, buf, count, index); \ | ||
780 | } | ||
781 | STORE_CACHE_DISABLE(0) | ||
782 | STORE_CACHE_DISABLE(1) | ||
783 | |||
784 | struct _cache_attr { | ||
785 | struct attribute attr; | ||
786 | ssize_t (*show)(struct _cpuid4_info *, char *); | ||
787 | ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); | ||
788 | }; | ||
789 | |||
790 | #define define_one_ro(_name) \ | 971 | #define define_one_ro(_name) \ |
791 | static struct _cache_attr _name = \ | 972 | static struct _cache_attr _name = \ |
792 | __ATTR(_name, 0444, show_##_name, NULL) | 973 | __ATTR(_name, 0444, show_##_name, NULL) |
@@ -801,23 +982,28 @@ define_one_ro(size); | |||
801 | define_one_ro(shared_cpu_map); | 982 | define_one_ro(shared_cpu_map); |
802 | define_one_ro(shared_cpu_list); | 983 | define_one_ro(shared_cpu_list); |
803 | 984 | ||
804 | static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, | 985 | #define DEFAULT_SYSFS_CACHE_ATTRS \ |
805 | show_cache_disable_0, store_cache_disable_0); | 986 | &type.attr, \ |
806 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | 987 | &level.attr, \ |
807 | show_cache_disable_1, store_cache_disable_1); | 988 | &coherency_line_size.attr, \ |
989 | &physical_line_partition.attr, \ | ||
990 | &ways_of_associativity.attr, \ | ||
991 | &number_of_sets.attr, \ | ||
992 | &size.attr, \ | ||
993 | &shared_cpu_map.attr, \ | ||
994 | &shared_cpu_list.attr | ||
808 | 995 | ||
809 | static struct attribute *default_attrs[] = { | 996 | static struct attribute *default_attrs[] = { |
810 | &type.attr, | 997 | DEFAULT_SYSFS_CACHE_ATTRS, |
811 | &level.attr, | 998 | NULL |
812 | &coherency_line_size.attr, | 999 | }; |
813 | &physical_line_partition.attr, | 1000 | |
814 | &ways_of_associativity.attr, | 1001 | static struct attribute *default_l3_attrs[] = { |
815 | &number_of_sets.attr, | 1002 | DEFAULT_SYSFS_CACHE_ATTRS, |
816 | &size.attr, | 1003 | #ifdef CONFIG_CPU_SUP_AMD |
817 | &shared_cpu_map.attr, | ||
818 | &shared_cpu_list.attr, | ||
819 | &cache_disable_0.attr, | 1004 | &cache_disable_0.attr, |
820 | &cache_disable_1.attr, | 1005 | &cache_disable_1.attr, |
1006 | #endif | ||
821 | NULL | 1007 | NULL |
822 | }; | 1008 | }; |
823 | 1009 | ||
@@ -848,7 +1034,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, | |||
848 | return ret; | 1034 | return ret; |
849 | } | 1035 | } |
850 | 1036 | ||
851 | static struct sysfs_ops sysfs_ops = { | 1037 | static const struct sysfs_ops sysfs_ops = { |
852 | .show = show, | 1038 | .show = show, |
853 | .store = store, | 1039 | .store = store, |
854 | }; | 1040 | }; |
@@ -908,6 +1094,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
908 | unsigned int cpu = sys_dev->id; | 1094 | unsigned int cpu = sys_dev->id; |
909 | unsigned long i, j; | 1095 | unsigned long i, j; |
910 | struct _index_kobject *this_object; | 1096 | struct _index_kobject *this_object; |
1097 | struct _cpuid4_info *this_leaf; | ||
911 | int retval; | 1098 | int retval; |
912 | 1099 | ||
913 | retval = cpuid4_cache_sysfs_init(cpu); | 1100 | retval = cpuid4_cache_sysfs_init(cpu); |
@@ -926,6 +1113,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
926 | this_object = INDEX_KOBJECT_PTR(cpu, i); | 1113 | this_object = INDEX_KOBJECT_PTR(cpu, i); |
927 | this_object->cpu = cpu; | 1114 | this_object->cpu = cpu; |
928 | this_object->index = i; | 1115 | this_object->index = i; |
1116 | |||
1117 | this_leaf = CPUID4_INFO_IDX(cpu, i); | ||
1118 | |||
1119 | if (this_leaf->l3 && this_leaf->l3->can_disable) | ||
1120 | ktype_cache.default_attrs = default_l3_attrs; | ||
1121 | else | ||
1122 | ktype_cache.default_attrs = default_attrs; | ||
1123 | |||
929 | retval = kobject_init_and_add(&(this_object->kobj), | 1124 | retval = kobject_init_and_add(&(this_object->kobj), |
930 | &ktype_cache, | 1125 | &ktype_cache, |
931 | per_cpu(ici_cache_kobject, cpu), | 1126 | per_cpu(ici_cache_kobject, cpu), |
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 4ac6d48fe11b..bb34b03af252 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
@@ -7,3 +7,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | |||
7 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o | 7 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o |
8 | 8 | ||
9 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o | 9 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o |
10 | |||
11 | obj-$(CONFIG_ACPI_APEI) += mce-apei.o | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c new file mode 100644 index 000000000000..8209472b27a5 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c | |||
@@ -0,0 +1,138 @@ | |||
1 | /* | ||
2 | * Bridge between MCE and APEI | ||
3 | * | ||
4 | * On some machine, corrected memory errors are reported via APEI | ||
5 | * generic hardware error source (GHES) instead of corrected Machine | ||
6 | * Check. These corrected memory errors can be reported to user space | ||
7 | * through /dev/mcelog via faking a corrected Machine Check, so that | ||
8 | * the error memory page can be offlined by /sbin/mcelog if the error | ||
9 | * count for one page is beyond the threshold. | ||
10 | * | ||
11 | * For fatal MCE, save MCE record into persistent storage via ERST, so | ||
12 | * that the MCE record can be logged after reboot via ERST. | ||
13 | * | ||
14 | * Copyright 2010 Intel Corp. | ||
15 | * Author: Huang Ying <ying.huang@intel.com> | ||
16 | * | ||
17 | * This program is free software; you can redistribute it and/or | ||
18 | * modify it under the terms of the GNU General Public License version | ||
19 | * 2 as published by the Free Software Foundation. | ||
20 | * | ||
21 | * This program is distributed in the hope that it will be useful, | ||
22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
24 | * GNU General Public License for more details. | ||
25 | * | ||
26 | * You should have received a copy of the GNU General Public License | ||
27 | * along with this program; if not, write to the Free Software | ||
28 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
29 | */ | ||
30 | |||
31 | #include <linux/kernel.h> | ||
32 | #include <linux/acpi.h> | ||
33 | #include <linux/cper.h> | ||
34 | #include <acpi/apei.h> | ||
35 | #include <asm/mce.h> | ||
36 | |||
37 | #include "mce-internal.h" | ||
38 | |||
39 | void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) | ||
40 | { | ||
41 | struct mce m; | ||
42 | |||
43 | /* Only corrected MC is reported */ | ||
44 | if (!corrected) | ||
45 | return; | ||
46 | |||
47 | mce_setup(&m); | ||
48 | m.bank = 1; | ||
49 | /* Fake a memory read corrected error with unknown channel */ | ||
50 | m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; | ||
51 | m.addr = mem_err->physical_addr; | ||
52 | mce_log(&m); | ||
53 | mce_notify_irq(); | ||
54 | } | ||
55 | EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); | ||
56 | |||
57 | #define CPER_CREATOR_MCE \ | ||
58 | UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ | ||
59 | 0x64, 0x90, 0xb8, 0x9d) | ||
60 | #define CPER_SECTION_TYPE_MCE \ | ||
61 | UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ | ||
62 | 0x04, 0x4a, 0x38, 0xfc) | ||
63 | |||
64 | /* | ||
65 | * CPER specification (in UEFI specification 2.3 appendix N) requires | ||
66 | * byte-packed. | ||
67 | */ | ||
68 | struct cper_mce_record { | ||
69 | struct cper_record_header hdr; | ||
70 | struct cper_section_descriptor sec_hdr; | ||
71 | struct mce mce; | ||
72 | } __packed; | ||
73 | |||
74 | int apei_write_mce(struct mce *m) | ||
75 | { | ||
76 | struct cper_mce_record rcd; | ||
77 | |||
78 | memset(&rcd, 0, sizeof(rcd)); | ||
79 | memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE); | ||
80 | rcd.hdr.revision = CPER_RECORD_REV; | ||
81 | rcd.hdr.signature_end = CPER_SIG_END; | ||
82 | rcd.hdr.section_count = 1; | ||
83 | rcd.hdr.error_severity = CPER_SEV_FATAL; | ||
84 | /* timestamp, platform_id, partition_id are all invalid */ | ||
85 | rcd.hdr.validation_bits = 0; | ||
86 | rcd.hdr.record_length = sizeof(rcd); | ||
87 | rcd.hdr.creator_id = CPER_CREATOR_MCE; | ||
88 | rcd.hdr.notification_type = CPER_NOTIFY_MCE; | ||
89 | rcd.hdr.record_id = cper_next_record_id(); | ||
90 | rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR; | ||
91 | |||
92 | rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd; | ||
93 | rcd.sec_hdr.section_length = sizeof(rcd.mce); | ||
94 | rcd.sec_hdr.revision = CPER_SEC_REV; | ||
95 | /* fru_id and fru_text is invalid */ | ||
96 | rcd.sec_hdr.validation_bits = 0; | ||
97 | rcd.sec_hdr.flags = CPER_SEC_PRIMARY; | ||
98 | rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE; | ||
99 | rcd.sec_hdr.section_severity = CPER_SEV_FATAL; | ||
100 | |||
101 | memcpy(&rcd.mce, m, sizeof(*m)); | ||
102 | |||
103 | return erst_write(&rcd.hdr); | ||
104 | } | ||
105 | |||
106 | ssize_t apei_read_mce(struct mce *m, u64 *record_id) | ||
107 | { | ||
108 | struct cper_mce_record rcd; | ||
109 | ssize_t len; | ||
110 | |||
111 | len = erst_read_next(&rcd.hdr, sizeof(rcd)); | ||
112 | if (len <= 0) | ||
113 | return len; | ||
114 | /* Can not skip other records in storage via ERST unless clear them */ | ||
115 | else if (len != sizeof(rcd) || | ||
116 | uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { | ||
117 | if (printk_ratelimit()) | ||
118 | pr_warning( | ||
119 | "MCE-APEI: Can not skip the unknown record in ERST"); | ||
120 | return -EIO; | ||
121 | } | ||
122 | |||
123 | memcpy(m, &rcd.mce, sizeof(*m)); | ||
124 | *record_id = rcd.hdr.record_id; | ||
125 | |||
126 | return sizeof(*m); | ||
127 | } | ||
128 | |||
129 | /* Check whether there is record in ERST */ | ||
130 | int apei_check_mce(void) | ||
131 | { | ||
132 | return erst_get_record_count(); | ||
133 | } | ||
134 | |||
135 | int apei_clear_mce(u64 record_id) | ||
136 | { | ||
137 | return erst_clear(record_id); | ||
138 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 73734baa50f2..e7dbde7bfedb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/kdebug.h> | 22 | #include <linux/kdebug.h> |
23 | #include <linux/cpu.h> | 23 | #include <linux/cpu.h> |
24 | #include <linux/sched.h> | 24 | #include <linux/sched.h> |
25 | #include <linux/gfp.h> | ||
25 | #include <asm/mce.h> | 26 | #include <asm/mce.h> |
26 | #include <asm/apic.h> | 27 | #include <asm/apic.h> |
27 | 28 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 32996f9fab67..fefcc69ee8b5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -28,3 +28,26 @@ extern int mce_ser; | |||
28 | 28 | ||
29 | extern struct mce_bank *mce_banks; | 29 | extern struct mce_bank *mce_banks; |
30 | 30 | ||
31 | #ifdef CONFIG_ACPI_APEI | ||
32 | int apei_write_mce(struct mce *m); | ||
33 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); | ||
34 | int apei_check_mce(void); | ||
35 | int apei_clear_mce(u64 record_id); | ||
36 | #else | ||
37 | static inline int apei_write_mce(struct mce *m) | ||
38 | { | ||
39 | return -EINVAL; | ||
40 | } | ||
41 | static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id) | ||
42 | { | ||
43 | return 0; | ||
44 | } | ||
45 | static inline int apei_check_mce(void) | ||
46 | { | ||
47 | return 0; | ||
48 | } | ||
49 | static inline int apei_clear_mce(u64 record_id) | ||
50 | { | ||
51 | return -EINVAL; | ||
52 | } | ||
53 | #endif | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a8aacd4b513c..ed41562909fe 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/sched.h> | 26 | #include <linux/sched.h> |
27 | #include <linux/sysfs.h> | 27 | #include <linux/sysfs.h> |
28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
29 | #include <linux/slab.h> | ||
29 | #include <linux/init.h> | 30 | #include <linux/init.h> |
30 | #include <linux/kmod.h> | 31 | #include <linux/kmod.h> |
31 | #include <linux/poll.h> | 32 | #include <linux/poll.h> |
@@ -35,6 +36,7 @@ | |||
35 | #include <linux/fs.h> | 36 | #include <linux/fs.h> |
36 | #include <linux/mm.h> | 37 | #include <linux/mm.h> |
37 | #include <linux/debugfs.h> | 38 | #include <linux/debugfs.h> |
39 | #include <linux/edac_mce.h> | ||
38 | 40 | ||
39 | #include <asm/processor.h> | 41 | #include <asm/processor.h> |
40 | #include <asm/hw_irq.h> | 42 | #include <asm/hw_irq.h> |
@@ -46,6 +48,13 @@ | |||
46 | 48 | ||
47 | #include "mce-internal.h" | 49 | #include "mce-internal.h" |
48 | 50 | ||
51 | static DEFINE_MUTEX(mce_read_mutex); | ||
52 | |||
53 | #define rcu_dereference_check_mce(p) \ | ||
54 | rcu_dereference_index_check((p), \ | ||
55 | rcu_read_lock_sched_held() || \ | ||
56 | lockdep_is_held(&mce_read_mutex)) | ||
57 | |||
49 | #define CREATE_TRACE_POINTS | 58 | #define CREATE_TRACE_POINTS |
50 | #include <trace/events/mce.h> | 59 | #include <trace/events/mce.h> |
51 | 60 | ||
@@ -98,8 +107,8 @@ EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | |||
98 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, | 107 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, |
99 | void *data) | 108 | void *data) |
100 | { | 109 | { |
101 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); | 110 | pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n"); |
102 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); | 111 | pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n"); |
103 | 112 | ||
104 | return NOTIFY_STOP; | 113 | return NOTIFY_STOP; |
105 | } | 114 | } |
@@ -158,9 +167,18 @@ void mce_log(struct mce *mce) | |||
158 | mce->finished = 0; | 167 | mce->finished = 0; |
159 | wmb(); | 168 | wmb(); |
160 | for (;;) { | 169 | for (;;) { |
161 | entry = rcu_dereference(mcelog.next); | 170 | entry = rcu_dereference_check_mce(mcelog.next); |
162 | for (;;) { | 171 | for (;;) { |
163 | /* | 172 | /* |
173 | * If edac_mce is enabled, it will check the error type | ||
174 | * and will process it, if it is a known error. | ||
175 | * Otherwise, the error will be sent through mcelog | ||
176 | * interface | ||
177 | */ | ||
178 | if (edac_mce_parse(mce)) | ||
179 | return; | ||
180 | |||
181 | /* | ||
164 | * When the buffer fills up discard new entries. | 182 | * When the buffer fills up discard new entries. |
165 | * Assume that the earlier errors are the more | 183 | * Assume that the earlier errors are the more |
166 | * interesting ones: | 184 | * interesting ones: |
@@ -193,11 +211,11 @@ void mce_log(struct mce *mce) | |||
193 | 211 | ||
194 | static void print_mce(struct mce *m) | 212 | static void print_mce(struct mce *m) |
195 | { | 213 | { |
196 | pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | 214 | pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", |
197 | m->extcpu, m->mcgstatus, m->bank, m->status); | 215 | m->extcpu, m->mcgstatus, m->bank, m->status); |
198 | 216 | ||
199 | if (m->ip) { | 217 | if (m->ip) { |
200 | pr_emerg("RIP%s %02x:<%016Lx> ", | 218 | pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", |
201 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | 219 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", |
202 | m->cs, m->ip); | 220 | m->cs, m->ip); |
203 | 221 | ||
@@ -206,14 +224,14 @@ static void print_mce(struct mce *m) | |||
206 | pr_cont("\n"); | 224 | pr_cont("\n"); |
207 | } | 225 | } |
208 | 226 | ||
209 | pr_emerg("TSC %llx ", m->tsc); | 227 | pr_emerg(HW_ERR "TSC %llx ", m->tsc); |
210 | if (m->addr) | 228 | if (m->addr) |
211 | pr_cont("ADDR %llx ", m->addr); | 229 | pr_cont("ADDR %llx ", m->addr); |
212 | if (m->misc) | 230 | if (m->misc) |
213 | pr_cont("MISC %llx ", m->misc); | 231 | pr_cont("MISC %llx ", m->misc); |
214 | 232 | ||
215 | pr_cont("\n"); | 233 | pr_cont("\n"); |
216 | pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | 234 | pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", |
217 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); | 235 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); |
218 | 236 | ||
219 | /* | 237 | /* |
@@ -223,16 +241,6 @@ static void print_mce(struct mce *m) | |||
223 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); | 241 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); |
224 | } | 242 | } |
225 | 243 | ||
226 | static void print_mce_head(void) | ||
227 | { | ||
228 | pr_emerg("\nHARDWARE ERROR\n"); | ||
229 | } | ||
230 | |||
231 | static void print_mce_tail(void) | ||
232 | { | ||
233 | pr_emerg("This is not a software problem!\n"); | ||
234 | } | ||
235 | |||
236 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | 244 | #define PANIC_TIMEOUT 5 /* 5 seconds */ |
237 | 245 | ||
238 | static atomic_t mce_paniced; | 246 | static atomic_t mce_paniced; |
@@ -256,7 +264,7 @@ static void wait_for_panic(void) | |||
256 | 264 | ||
257 | static void mce_panic(char *msg, struct mce *final, char *exp) | 265 | static void mce_panic(char *msg, struct mce *final, char *exp) |
258 | { | 266 | { |
259 | int i; | 267 | int i, apei_err = 0; |
260 | 268 | ||
261 | if (!fake_panic) { | 269 | if (!fake_panic) { |
262 | /* | 270 | /* |
@@ -273,14 +281,16 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
273 | if (atomic_inc_return(&mce_fake_paniced) > 1) | 281 | if (atomic_inc_return(&mce_fake_paniced) > 1) |
274 | return; | 282 | return; |
275 | } | 283 | } |
276 | print_mce_head(); | ||
277 | /* First print corrected ones that are still unlogged */ | 284 | /* First print corrected ones that are still unlogged */ |
278 | for (i = 0; i < MCE_LOG_LEN; i++) { | 285 | for (i = 0; i < MCE_LOG_LEN; i++) { |
279 | struct mce *m = &mcelog.entry[i]; | 286 | struct mce *m = &mcelog.entry[i]; |
280 | if (!(m->status & MCI_STATUS_VAL)) | 287 | if (!(m->status & MCI_STATUS_VAL)) |
281 | continue; | 288 | continue; |
282 | if (!(m->status & MCI_STATUS_UC)) | 289 | if (!(m->status & MCI_STATUS_UC)) { |
283 | print_mce(m); | 290 | print_mce(m); |
291 | if (!apei_err) | ||
292 | apei_err = apei_write_mce(m); | ||
293 | } | ||
284 | } | 294 | } |
285 | /* Now print uncorrected but with the final one last */ | 295 | /* Now print uncorrected but with the final one last */ |
286 | for (i = 0; i < MCE_LOG_LEN; i++) { | 296 | for (i = 0; i < MCE_LOG_LEN; i++) { |
@@ -289,22 +299,27 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
289 | continue; | 299 | continue; |
290 | if (!(m->status & MCI_STATUS_UC)) | 300 | if (!(m->status & MCI_STATUS_UC)) |
291 | continue; | 301 | continue; |
292 | if (!final || memcmp(m, final, sizeof(struct mce))) | 302 | if (!final || memcmp(m, final, sizeof(struct mce))) { |
293 | print_mce(m); | 303 | print_mce(m); |
304 | if (!apei_err) | ||
305 | apei_err = apei_write_mce(m); | ||
306 | } | ||
294 | } | 307 | } |
295 | if (final) | 308 | if (final) { |
296 | print_mce(final); | 309 | print_mce(final); |
310 | if (!apei_err) | ||
311 | apei_err = apei_write_mce(final); | ||
312 | } | ||
297 | if (cpu_missing) | 313 | if (cpu_missing) |
298 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); | 314 | pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n"); |
299 | print_mce_tail(); | ||
300 | if (exp) | 315 | if (exp) |
301 | printk(KERN_EMERG "Machine check: %s\n", exp); | 316 | pr_emerg(HW_ERR "Machine check: %s\n", exp); |
302 | if (!fake_panic) { | 317 | if (!fake_panic) { |
303 | if (panic_timeout == 0) | 318 | if (panic_timeout == 0) |
304 | panic_timeout = mce_panic_timeout; | 319 | panic_timeout = mce_panic_timeout; |
305 | panic(msg); | 320 | panic(msg); |
306 | } else | 321 | } else |
307 | printk(KERN_EMERG "Fake kernel panic: %s\n", msg); | 322 | pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); |
308 | } | 323 | } |
309 | 324 | ||
310 | /* Support code for software error injection */ | 325 | /* Support code for software error injection */ |
@@ -531,7 +546,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
531 | struct mce m; | 546 | struct mce m; |
532 | int i; | 547 | int i; |
533 | 548 | ||
534 | __get_cpu_var(mce_poll_count)++; | 549 | percpu_inc(mce_poll_count); |
535 | 550 | ||
536 | mce_setup(&m); | 551 | mce_setup(&m); |
537 | 552 | ||
@@ -573,6 +588,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
573 | */ | 588 | */ |
574 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { | 589 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { |
575 | mce_log(&m); | 590 | mce_log(&m); |
591 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); | ||
576 | add_taint(TAINT_MACHINE_CHECK); | 592 | add_taint(TAINT_MACHINE_CHECK); |
577 | } | 593 | } |
578 | 594 | ||
@@ -926,7 +942,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
926 | 942 | ||
927 | atomic_inc(&mce_entry); | 943 | atomic_inc(&mce_entry); |
928 | 944 | ||
929 | __get_cpu_var(mce_exception_count)++; | 945 | percpu_inc(mce_exception_count); |
930 | 946 | ||
931 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | 947 | if (notify_die(DIE_NMI, "machine check", regs, error_code, |
932 | 18, SIGKILL) == NOTIFY_STOP) | 948 | 18, SIGKILL) == NOTIFY_STOP) |
@@ -1193,7 +1209,7 @@ int mce_notify_irq(void) | |||
1193 | schedule_work(&mce_trigger_work); | 1209 | schedule_work(&mce_trigger_work); |
1194 | 1210 | ||
1195 | if (__ratelimit(&ratelimit)) | 1211 | if (__ratelimit(&ratelimit)) |
1196 | printk(KERN_INFO "Machine check events logged\n"); | 1212 | pr_info(HW_ERR "Machine check events logged\n"); |
1197 | 1213 | ||
1198 | return 1; | 1214 | return 1; |
1199 | } | 1215 | } |
@@ -1485,7 +1501,42 @@ static void collect_tscs(void *data) | |||
1485 | rdtscll(cpu_tsc[smp_processor_id()]); | 1501 | rdtscll(cpu_tsc[smp_processor_id()]); |
1486 | } | 1502 | } |
1487 | 1503 | ||
1488 | static DEFINE_MUTEX(mce_read_mutex); | 1504 | static int mce_apei_read_done; |
1505 | |||
1506 | /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ | ||
1507 | static int __mce_read_apei(char __user **ubuf, size_t usize) | ||
1508 | { | ||
1509 | int rc; | ||
1510 | u64 record_id; | ||
1511 | struct mce m; | ||
1512 | |||
1513 | if (usize < sizeof(struct mce)) | ||
1514 | return -EINVAL; | ||
1515 | |||
1516 | rc = apei_read_mce(&m, &record_id); | ||
1517 | /* Error or no more MCE record */ | ||
1518 | if (rc <= 0) { | ||
1519 | mce_apei_read_done = 1; | ||
1520 | return rc; | ||
1521 | } | ||
1522 | rc = -EFAULT; | ||
1523 | if (copy_to_user(*ubuf, &m, sizeof(struct mce))) | ||
1524 | return rc; | ||
1525 | /* | ||
1526 | * In fact, we should have cleared the record after that has | ||
1527 | * been flushed to the disk or sent to network in | ||
1528 | * /sbin/mcelog, but we have no interface to support that now, | ||
1529 | * so just clear it to avoid duplication. | ||
1530 | */ | ||
1531 | rc = apei_clear_mce(record_id); | ||
1532 | if (rc) { | ||
1533 | mce_apei_read_done = 1; | ||
1534 | return rc; | ||
1535 | } | ||
1536 | *ubuf += sizeof(struct mce); | ||
1537 | |||
1538 | return 0; | ||
1539 | } | ||
1489 | 1540 | ||
1490 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | 1541 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, |
1491 | loff_t *off) | 1542 | loff_t *off) |
@@ -1500,16 +1551,20 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
1500 | return -ENOMEM; | 1551 | return -ENOMEM; |
1501 | 1552 | ||
1502 | mutex_lock(&mce_read_mutex); | 1553 | mutex_lock(&mce_read_mutex); |
1503 | next = rcu_dereference(mcelog.next); | ||
1504 | |||
1505 | /* Only supports full reads right now */ | ||
1506 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | ||
1507 | mutex_unlock(&mce_read_mutex); | ||
1508 | kfree(cpu_tsc); | ||
1509 | 1554 | ||
1510 | return -EINVAL; | 1555 | if (!mce_apei_read_done) { |
1556 | err = __mce_read_apei(&buf, usize); | ||
1557 | if (err || buf != ubuf) | ||
1558 | goto out; | ||
1511 | } | 1559 | } |
1512 | 1560 | ||
1561 | next = rcu_dereference_check_mce(mcelog.next); | ||
1562 | |||
1563 | /* Only supports full reads right now */ | ||
1564 | err = -EINVAL; | ||
1565 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) | ||
1566 | goto out; | ||
1567 | |||
1513 | err = 0; | 1568 | err = 0; |
1514 | prev = 0; | 1569 | prev = 0; |
1515 | do { | 1570 | do { |
@@ -1556,16 +1611,23 @@ timeout: | |||
1556 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | 1611 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); |
1557 | } | 1612 | } |
1558 | } | 1613 | } |
1614 | |||
1615 | if (err) | ||
1616 | err = -EFAULT; | ||
1617 | |||
1618 | out: | ||
1559 | mutex_unlock(&mce_read_mutex); | 1619 | mutex_unlock(&mce_read_mutex); |
1560 | kfree(cpu_tsc); | 1620 | kfree(cpu_tsc); |
1561 | 1621 | ||
1562 | return err ? -EFAULT : buf - ubuf; | 1622 | return err ? err : buf - ubuf; |
1563 | } | 1623 | } |
1564 | 1624 | ||
1565 | static unsigned int mce_poll(struct file *file, poll_table *wait) | 1625 | static unsigned int mce_poll(struct file *file, poll_table *wait) |
1566 | { | 1626 | { |
1567 | poll_wait(file, &mce_wait, wait); | 1627 | poll_wait(file, &mce_wait, wait); |
1568 | if (rcu_dereference(mcelog.next)) | 1628 | if (rcu_dereference_check_mce(mcelog.next)) |
1629 | return POLLIN | POLLRDNORM; | ||
1630 | if (!mce_apei_read_done && apei_check_mce()) | ||
1569 | return POLLIN | POLLRDNORM; | 1631 | return POLLIN | POLLRDNORM; |
1570 | return 0; | 1632 | return 0; |
1571 | } | 1633 | } |
@@ -2044,6 +2106,7 @@ static __init void mce_init_banks(void) | |||
2044 | struct mce_bank *b = &mce_banks[i]; | 2106 | struct mce_bank *b = &mce_banks[i]; |
2045 | struct sysdev_attribute *a = &b->attr; | 2107 | struct sysdev_attribute *a = &b->attr; |
2046 | 2108 | ||
2109 | sysfs_attr_init(&a->attr); | ||
2047 | a->attr.name = b->attrname; | 2110 | a->attr.name = b->attrname; |
2048 | snprintf(b->attrname, ATTR_LEN, "bank%d", i); | 2111 | snprintf(b->attrname, ATTR_LEN, "bank%d", i); |
2049 | 2112 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 83a3d1f4efca..39aaee5c1ab2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/sysfs.h> | 23 | #include <linux/sysfs.h> |
24 | #include <linux/slab.h> | ||
24 | #include <linux/init.h> | 25 | #include <linux/init.h> |
25 | #include <linux/cpu.h> | 26 | #include <linux/cpu.h> |
26 | #include <linux/smp.h> | 27 | #include <linux/smp.h> |
@@ -140,6 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
140 | address = (low & MASK_BLKPTR_LO) >> 21; | 141 | address = (low & MASK_BLKPTR_LO) >> 21; |
141 | if (!address) | 142 | if (!address) |
142 | break; | 143 | break; |
144 | |||
143 | address += MCG_XBLK_ADDR; | 145 | address += MCG_XBLK_ADDR; |
144 | } else | 146 | } else |
145 | ++address; | 147 | ++address; |
@@ -147,12 +149,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
147 | if (rdmsr_safe(address, &low, &high)) | 149 | if (rdmsr_safe(address, &low, &high)) |
148 | break; | 150 | break; |
149 | 151 | ||
150 | if (!(high & MASK_VALID_HI)) { | 152 | if (!(high & MASK_VALID_HI)) |
151 | if (block) | 153 | continue; |
152 | continue; | ||
153 | else | ||
154 | break; | ||
155 | } | ||
156 | 154 | ||
157 | if (!(high & MASK_CNTP_HI) || | 155 | if (!(high & MASK_CNTP_HI) || |
158 | (high & MASK_LOCKED_HI)) | 156 | (high & MASK_LOCKED_HI)) |
@@ -388,7 +386,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, | |||
388 | return ret; | 386 | return ret; |
389 | } | 387 | } |
390 | 388 | ||
391 | static struct sysfs_ops threshold_ops = { | 389 | static const struct sysfs_ops threshold_ops = { |
392 | .show = show, | 390 | .show = show, |
393 | .store = store, | 391 | .store = store, |
394 | }; | 392 | }; |
@@ -529,7 +527,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
529 | err = -ENOMEM; | 527 | err = -ENOMEM; |
530 | goto out; | 528 | goto out; |
531 | } | 529 | } |
532 | if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) { | 530 | if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) { |
533 | kfree(b); | 531 | kfree(b); |
534 | err = -ENOMEM; | 532 | err = -ENOMEM; |
535 | goto out; | 533 | goto out; |
@@ -542,7 +540,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
542 | #ifndef CONFIG_SMP | 540 | #ifndef CONFIG_SMP |
543 | cpumask_setall(b->cpus); | 541 | cpumask_setall(b->cpus); |
544 | #else | 542 | #else |
545 | cpumask_copy(b->cpus, c->llc_shared_map); | 543 | cpumask_set_cpu(cpu, b->cpus); |
546 | #endif | 544 | #endif |
547 | 545 | ||
548 | per_cpu(threshold_banks, cpu)[bank] = b; | 546 | per_cpu(threshold_banks, cpu)[bank] = b; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 7c785634af2b..6fcd0936194f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * Author: Andi Kleen | 5 | * Author: Andi Kleen |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/gfp.h> | ||
8 | #include <linux/init.h> | 9 | #include <linux/init.h> |
9 | #include <linux/interrupt.h> | 10 | #include <linux/interrupt.h> |
10 | #include <linux/percpu.h> | 11 | #include <linux/percpu.h> |
@@ -94,20 +95,21 @@ static void cmci_discover(int banks, int boot) | |||
94 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 95 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
95 | 96 | ||
96 | /* Already owned by someone else? */ | 97 | /* Already owned by someone else? */ |
97 | if (val & CMCI_EN) { | 98 | if (val & MCI_CTL2_CMCI_EN) { |
98 | if (test_and_clear_bit(i, owned) || boot) | 99 | if (test_and_clear_bit(i, owned) && !boot) |
99 | print_update("SHD", &hdr, i); | 100 | print_update("SHD", &hdr, i); |
100 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
101 | continue; | 102 | continue; |
102 | } | 103 | } |
103 | 104 | ||
104 | val |= CMCI_EN | CMCI_THRESHOLD; | 105 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
106 | val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; | ||
105 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 107 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
106 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 108 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
107 | 109 | ||
108 | /* Did the enable bit stick? -- the bank supports CMCI */ | 110 | /* Did the enable bit stick? -- the bank supports CMCI */ |
109 | if (val & CMCI_EN) { | 111 | if (val & MCI_CTL2_CMCI_EN) { |
110 | if (!test_and_set_bit(i, owned) || boot) | 112 | if (!test_and_set_bit(i, owned) && !boot) |
111 | print_update("CMCI", &hdr, i); | 113 | print_update("CMCI", &hdr, i); |
112 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 114 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
113 | } else { | 115 | } else { |
@@ -154,7 +156,7 @@ void cmci_clear(void) | |||
154 | continue; | 156 | continue; |
155 | /* Disable CMCI */ | 157 | /* Disable CMCI */ |
156 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 158 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
157 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | 159 | val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); |
158 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 160 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
159 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 161 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
160 | } | 162 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 81c499eceb21..169d8804a9f8 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -34,15 +34,25 @@ | |||
34 | /* How long to wait between reporting thermal events */ | 34 | /* How long to wait between reporting thermal events */ |
35 | #define CHECK_INTERVAL (300 * HZ) | 35 | #define CHECK_INTERVAL (300 * HZ) |
36 | 36 | ||
37 | #define THERMAL_THROTTLING_EVENT 0 | ||
38 | #define POWER_LIMIT_EVENT 1 | ||
39 | |||
37 | /* | 40 | /* |
38 | * Current thermal throttling state: | 41 | * Current thermal event state: |
39 | */ | 42 | */ |
40 | struct thermal_state { | 43 | struct _thermal_state { |
41 | bool is_throttled; | 44 | bool new_event; |
42 | 45 | int event; | |
43 | u64 next_check; | 46 | u64 next_check; |
44 | unsigned long throttle_count; | 47 | unsigned long count; |
45 | unsigned long last_throttle_count; | 48 | unsigned long last_count; |
49 | }; | ||
50 | |||
51 | struct thermal_state { | ||
52 | struct _thermal_state core_throttle; | ||
53 | struct _thermal_state core_power_limit; | ||
54 | struct _thermal_state package_throttle; | ||
55 | struct _thermal_state package_power_limit; | ||
46 | }; | 56 | }; |
47 | 57 | ||
48 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); | 58 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); |
@@ -53,11 +63,13 @@ static u32 lvtthmr_init __read_mostly; | |||
53 | 63 | ||
54 | #ifdef CONFIG_SYSFS | 64 | #ifdef CONFIG_SYSFS |
55 | #define define_therm_throt_sysdev_one_ro(_name) \ | 65 | #define define_therm_throt_sysdev_one_ro(_name) \ |
56 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | 66 | static SYSDEV_ATTR(_name, 0444, \ |
67 | therm_throt_sysdev_show_##_name, \ | ||
68 | NULL) \ | ||
57 | 69 | ||
58 | #define define_therm_throt_sysdev_show_func(name) \ | 70 | #define define_therm_throt_sysdev_show_func(event, name) \ |
59 | \ | 71 | \ |
60 | static ssize_t therm_throt_sysdev_show_##name( \ | 72 | static ssize_t therm_throt_sysdev_show_##event##_##name( \ |
61 | struct sys_device *dev, \ | 73 | struct sys_device *dev, \ |
62 | struct sysdev_attribute *attr, \ | 74 | struct sysdev_attribute *attr, \ |
63 | char *buf) \ | 75 | char *buf) \ |
@@ -66,30 +78,42 @@ static ssize_t therm_throt_sysdev_show_##name( \ | |||
66 | ssize_t ret; \ | 78 | ssize_t ret; \ |
67 | \ | 79 | \ |
68 | preempt_disable(); /* CPU hotplug */ \ | 80 | preempt_disable(); /* CPU hotplug */ \ |
69 | if (cpu_online(cpu)) \ | 81 | if (cpu_online(cpu)) { \ |
70 | ret = sprintf(buf, "%lu\n", \ | 82 | ret = sprintf(buf, "%lu\n", \ |
71 | per_cpu(thermal_state, cpu).name); \ | 83 | per_cpu(thermal_state, cpu).event.name); \ |
72 | else \ | 84 | } else \ |
73 | ret = 0; \ | 85 | ret = 0; \ |
74 | preempt_enable(); \ | 86 | preempt_enable(); \ |
75 | \ | 87 | \ |
76 | return ret; \ | 88 | return ret; \ |
77 | } | 89 | } |
78 | 90 | ||
79 | define_therm_throt_sysdev_show_func(throttle_count); | 91 | define_therm_throt_sysdev_show_func(core_throttle, count); |
80 | define_therm_throt_sysdev_one_ro(throttle_count); | 92 | define_therm_throt_sysdev_one_ro(core_throttle_count); |
93 | |||
94 | define_therm_throt_sysdev_show_func(core_power_limit, count); | ||
95 | define_therm_throt_sysdev_one_ro(core_power_limit_count); | ||
96 | |||
97 | define_therm_throt_sysdev_show_func(package_throttle, count); | ||
98 | define_therm_throt_sysdev_one_ro(package_throttle_count); | ||
99 | |||
100 | define_therm_throt_sysdev_show_func(package_power_limit, count); | ||
101 | define_therm_throt_sysdev_one_ro(package_power_limit_count); | ||
81 | 102 | ||
82 | static struct attribute *thermal_throttle_attrs[] = { | 103 | static struct attribute *thermal_throttle_attrs[] = { |
83 | &attr_throttle_count.attr, | 104 | &attr_core_throttle_count.attr, |
84 | NULL | 105 | NULL |
85 | }; | 106 | }; |
86 | 107 | ||
87 | static struct attribute_group thermal_throttle_attr_group = { | 108 | static struct attribute_group thermal_attr_group = { |
88 | .attrs = thermal_throttle_attrs, | 109 | .attrs = thermal_throttle_attrs, |
89 | .name = "thermal_throttle" | 110 | .name = "thermal_throttle" |
90 | }; | 111 | }; |
91 | #endif /* CONFIG_SYSFS */ | 112 | #endif /* CONFIG_SYSFS */ |
92 | 113 | ||
114 | #define CORE_LEVEL 0 | ||
115 | #define PACKAGE_LEVEL 1 | ||
116 | |||
93 | /*** | 117 | /*** |
94 | * therm_throt_process - Process thermal throttling event from interrupt | 118 | * therm_throt_process - Process thermal throttling event from interrupt |
95 | * @curr: Whether the condition is current or not (boolean), since the | 119 | * @curr: Whether the condition is current or not (boolean), since the |
@@ -106,39 +130,70 @@ static struct attribute_group thermal_throttle_attr_group = { | |||
106 | * 1 : Event should be logged further, and a message has been | 130 | * 1 : Event should be logged further, and a message has been |
107 | * printed to the syslog. | 131 | * printed to the syslog. |
108 | */ | 132 | */ |
109 | static int therm_throt_process(bool is_throttled) | 133 | static int therm_throt_process(bool new_event, int event, int level) |
110 | { | 134 | { |
111 | struct thermal_state *state; | 135 | struct _thermal_state *state; |
112 | unsigned int this_cpu; | 136 | unsigned int this_cpu = smp_processor_id(); |
113 | bool was_throttled; | 137 | bool old_event; |
114 | u64 now; | 138 | u64 now; |
139 | struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); | ||
115 | 140 | ||
116 | this_cpu = smp_processor_id(); | ||
117 | now = get_jiffies_64(); | 141 | now = get_jiffies_64(); |
118 | state = &per_cpu(thermal_state, this_cpu); | 142 | if (level == CORE_LEVEL) { |
143 | if (event == THERMAL_THROTTLING_EVENT) | ||
144 | state = &pstate->core_throttle; | ||
145 | else if (event == POWER_LIMIT_EVENT) | ||
146 | state = &pstate->core_power_limit; | ||
147 | else | ||
148 | return 0; | ||
149 | } else if (level == PACKAGE_LEVEL) { | ||
150 | if (event == THERMAL_THROTTLING_EVENT) | ||
151 | state = &pstate->package_throttle; | ||
152 | else if (event == POWER_LIMIT_EVENT) | ||
153 | state = &pstate->package_power_limit; | ||
154 | else | ||
155 | return 0; | ||
156 | } else | ||
157 | return 0; | ||
119 | 158 | ||
120 | was_throttled = state->is_throttled; | 159 | old_event = state->new_event; |
121 | state->is_throttled = is_throttled; | 160 | state->new_event = new_event; |
122 | 161 | ||
123 | if (is_throttled) | 162 | if (new_event) |
124 | state->throttle_count++; | 163 | state->count++; |
125 | 164 | ||
126 | if (time_before64(now, state->next_check) && | 165 | if (time_before64(now, state->next_check) && |
127 | state->throttle_count != state->last_throttle_count) | 166 | state->count != state->last_count) |
128 | return 0; | 167 | return 0; |
129 | 168 | ||
130 | state->next_check = now + CHECK_INTERVAL; | 169 | state->next_check = now + CHECK_INTERVAL; |
131 | state->last_throttle_count = state->throttle_count; | 170 | state->last_count = state->count; |
132 | 171 | ||
133 | /* if we just entered the thermal event */ | 172 | /* if we just entered the thermal event */ |
134 | if (is_throttled) { | 173 | if (new_event) { |
135 | printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count); | 174 | if (event == THERMAL_THROTTLING_EVENT) |
175 | printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", | ||
176 | this_cpu, | ||
177 | level == CORE_LEVEL ? "Core" : "Package", | ||
178 | state->count); | ||
179 | else | ||
180 | printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n", | ||
181 | this_cpu, | ||
182 | level == CORE_LEVEL ? "Core" : "Package", | ||
183 | state->count); | ||
136 | 184 | ||
137 | add_taint(TAINT_MACHINE_CHECK); | 185 | add_taint(TAINT_MACHINE_CHECK); |
138 | return 1; | 186 | return 1; |
139 | } | 187 | } |
140 | if (was_throttled) { | 188 | if (old_event) { |
141 | printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu); | 189 | if (event == THERMAL_THROTTLING_EVENT) |
190 | printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", | ||
191 | this_cpu, | ||
192 | level == CORE_LEVEL ? "Core" : "Package"); | ||
193 | else | ||
194 | printk(KERN_INFO "CPU%d: %s power limit normal\n", | ||
195 | this_cpu, | ||
196 | level == CORE_LEVEL ? "Core" : "Package"); | ||
142 | return 1; | 197 | return 1; |
143 | } | 198 | } |
144 | 199 | ||
@@ -147,15 +202,36 @@ static int therm_throt_process(bool is_throttled) | |||
147 | 202 | ||
148 | #ifdef CONFIG_SYSFS | 203 | #ifdef CONFIG_SYSFS |
149 | /* Add/Remove thermal_throttle interface for CPU device: */ | 204 | /* Add/Remove thermal_throttle interface for CPU device: */ |
150 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) | 205 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, |
206 | unsigned int cpu) | ||
151 | { | 207 | { |
152 | return sysfs_create_group(&sys_dev->kobj, | 208 | int err; |
153 | &thermal_throttle_attr_group); | 209 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
210 | |||
211 | err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); | ||
212 | if (err) | ||
213 | return err; | ||
214 | |||
215 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
216 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
217 | &attr_core_power_limit_count.attr, | ||
218 | thermal_attr_group.name); | ||
219 | if (cpu_has(c, X86_FEATURE_PTS)) { | ||
220 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
221 | &attr_package_throttle_count.attr, | ||
222 | thermal_attr_group.name); | ||
223 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
224 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
225 | &attr_package_power_limit_count.attr, | ||
226 | thermal_attr_group.name); | ||
227 | } | ||
228 | |||
229 | return err; | ||
154 | } | 230 | } |
155 | 231 | ||
156 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | 232 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) |
157 | { | 233 | { |
158 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); | 234 | sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group); |
159 | } | 235 | } |
160 | 236 | ||
161 | /* Mutex protecting device creation against CPU hotplug: */ | 237 | /* Mutex protecting device creation against CPU hotplug: */ |
@@ -177,7 +253,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
177 | case CPU_UP_PREPARE: | 253 | case CPU_UP_PREPARE: |
178 | case CPU_UP_PREPARE_FROZEN: | 254 | case CPU_UP_PREPARE_FROZEN: |
179 | mutex_lock(&therm_cpu_lock); | 255 | mutex_lock(&therm_cpu_lock); |
180 | err = thermal_throttle_add_dev(sys_dev); | 256 | err = thermal_throttle_add_dev(sys_dev, cpu); |
181 | mutex_unlock(&therm_cpu_lock); | 257 | mutex_unlock(&therm_cpu_lock); |
182 | WARN_ON(err); | 258 | WARN_ON(err); |
183 | break; | 259 | break; |
@@ -190,7 +266,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
190 | mutex_unlock(&therm_cpu_lock); | 266 | mutex_unlock(&therm_cpu_lock); |
191 | break; | 267 | break; |
192 | } | 268 | } |
193 | return err ? NOTIFY_BAD : NOTIFY_OK; | 269 | return notifier_from_errno(err); |
194 | } | 270 | } |
195 | 271 | ||
196 | static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = | 272 | static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = |
@@ -213,7 +289,7 @@ static __init int thermal_throttle_init_device(void) | |||
213 | #endif | 289 | #endif |
214 | /* connect live CPUs to sysfs */ | 290 | /* connect live CPUs to sysfs */ |
215 | for_each_online_cpu(cpu) { | 291 | for_each_online_cpu(cpu) { |
216 | err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); | 292 | err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu); |
217 | WARN_ON(err); | 293 | WARN_ON(err); |
218 | } | 294 | } |
219 | #ifdef CONFIG_HOTPLUG_CPU | 295 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -226,14 +302,50 @@ device_initcall(thermal_throttle_init_device); | |||
226 | 302 | ||
227 | #endif /* CONFIG_SYSFS */ | 303 | #endif /* CONFIG_SYSFS */ |
228 | 304 | ||
305 | /* | ||
306 | * Set up the most two significant bit to notify mce log that this thermal | ||
307 | * event type. | ||
308 | * This is a temp solution. May be changed in the future with mce log | ||
309 | * infrasture. | ||
310 | */ | ||
311 | #define CORE_THROTTLED (0) | ||
312 | #define CORE_POWER_LIMIT ((__u64)1 << 62) | ||
313 | #define PACKAGE_THROTTLED ((__u64)2 << 62) | ||
314 | #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) | ||
315 | |||
229 | /* Thermal transition interrupt handler */ | 316 | /* Thermal transition interrupt handler */ |
230 | static void intel_thermal_interrupt(void) | 317 | static void intel_thermal_interrupt(void) |
231 | { | 318 | { |
232 | __u64 msr_val; | 319 | __u64 msr_val; |
320 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); | ||
233 | 321 | ||
234 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | 322 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
235 | if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0)) | 323 | |
236 | mce_log_therm_throt_event(msr_val); | 324 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, |
325 | THERMAL_THROTTLING_EVENT, | ||
326 | CORE_LEVEL) != 0) | ||
327 | mce_log_therm_throt_event(CORE_THROTTLED | msr_val); | ||
328 | |||
329 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
330 | if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, | ||
331 | POWER_LIMIT_EVENT, | ||
332 | CORE_LEVEL) != 0) | ||
333 | mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); | ||
334 | |||
335 | if (cpu_has(c, X86_FEATURE_PTS)) { | ||
336 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | ||
337 | if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, | ||
338 | THERMAL_THROTTLING_EVENT, | ||
339 | PACKAGE_LEVEL) != 0) | ||
340 | mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); | ||
341 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
342 | if (therm_throt_process(msr_val & | ||
343 | PACKAGE_THERM_STATUS_POWER_LIMIT, | ||
344 | POWER_LIMIT_EVENT, | ||
345 | PACKAGE_LEVEL) != 0) | ||
346 | mce_log_therm_throt_event(PACKAGE_POWER_LIMIT | ||
347 | | msr_val); | ||
348 | } | ||
237 | } | 349 | } |
238 | 350 | ||
239 | static void unexpected_thermal_interrupt(void) | 351 | static void unexpected_thermal_interrupt(void) |
@@ -335,8 +447,26 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
335 | apic_write(APIC_LVTTHMR, h); | 447 | apic_write(APIC_LVTTHMR, h); |
336 | 448 | ||
337 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | 449 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); |
338 | wrmsr(MSR_IA32_THERM_INTERRUPT, | 450 | if (cpu_has(c, X86_FEATURE_PLN)) |
339 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | 451 | wrmsr(MSR_IA32_THERM_INTERRUPT, |
452 | l | (THERM_INT_LOW_ENABLE | ||
453 | | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); | ||
454 | else | ||
455 | wrmsr(MSR_IA32_THERM_INTERRUPT, | ||
456 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | ||
457 | |||
458 | if (cpu_has(c, X86_FEATURE_PTS)) { | ||
459 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | ||
460 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
461 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
462 | l | (PACKAGE_THERM_INT_LOW_ENABLE | ||
463 | | PACKAGE_THERM_INT_HIGH_ENABLE | ||
464 | | PACKAGE_THERM_INT_PLN_ENABLE), h); | ||
465 | else | ||
466 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
467 | l | (PACKAGE_THERM_INT_LOW_ENABLE | ||
468 | | PACKAGE_THERM_INT_HIGH_ENABLE), h); | ||
469 | } | ||
340 | 470 | ||
341 | smp_thermal_vector = intel_thermal_interrupt; | 471 | smp_thermal_vector = intel_thermal_interrupt; |
342 | 472 | ||
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c new file mode 100644 index 000000000000..d944bf6c50e9 --- /dev/null +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
@@ -0,0 +1,56 @@ | |||
1 | /* | ||
2 | * HyperV Detection code. | ||
3 | * | ||
4 | * Copyright (C) 2010, Novell, Inc. | ||
5 | * Author : K. Y. Srinivasan <ksrinivasan@novell.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; version 2 of the License. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/types.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <asm/processor.h> | ||
16 | #include <asm/hypervisor.h> | ||
17 | #include <asm/hyperv.h> | ||
18 | #include <asm/mshyperv.h> | ||
19 | |||
20 | struct ms_hyperv_info ms_hyperv; | ||
21 | EXPORT_SYMBOL_GPL(ms_hyperv); | ||
22 | |||
23 | static bool __init ms_hyperv_platform(void) | ||
24 | { | ||
25 | u32 eax; | ||
26 | u32 hyp_signature[3]; | ||
27 | |||
28 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) | ||
29 | return false; | ||
30 | |||
31 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, | ||
32 | &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); | ||
33 | |||
34 | return eax >= HYPERV_CPUID_MIN && | ||
35 | eax <= HYPERV_CPUID_MAX && | ||
36 | !memcmp("Microsoft Hv", hyp_signature, 12); | ||
37 | } | ||
38 | |||
39 | static void __init ms_hyperv_init_platform(void) | ||
40 | { | ||
41 | /* | ||
42 | * Extract the features and hints | ||
43 | */ | ||
44 | ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); | ||
45 | ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); | ||
46 | |||
47 | printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", | ||
48 | ms_hyperv.features, ms_hyperv.hints); | ||
49 | } | ||
50 | |||
51 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | ||
52 | .name = "Microsoft HyperV", | ||
53 | .detect = ms_hyperv_platform, | ||
54 | .init_platform = ms_hyperv_init_platform, | ||
55 | }; | ||
56 | EXPORT_SYMBOL(x86_hyper_ms_hyperv); | ||
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile index f4361b56f8e9..ad9e5ed81181 100644 --- a/arch/x86/kernel/cpu/mtrr/Makefile +++ b/arch/x86/kernel/cpu/mtrr/Makefile | |||
@@ -1,3 +1,3 @@ | |||
1 | obj-y := main.o if.o generic.o state.o cleanup.o | 1 | obj-y := main.o if.o generic.o cleanup.o |
2 | obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o | 2 | obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o |
3 | 3 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c index 33af14110dfd..92ba9cd31c9a 100644 --- a/arch/x86/kernel/cpu/mtrr/amd.c +++ b/arch/x86/kernel/cpu/mtrr/amd.c | |||
@@ -108,7 +108,7 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) | |||
108 | return 0; | 108 | return 0; |
109 | } | 109 | } |
110 | 110 | ||
111 | static struct mtrr_ops amd_mtrr_ops = { | 111 | static const struct mtrr_ops amd_mtrr_ops = { |
112 | .vendor = X86_VENDOR_AMD, | 112 | .vendor = X86_VENDOR_AMD, |
113 | .set = amd_set_mtrr, | 113 | .set = amd_set_mtrr, |
114 | .get = amd_get_mtrr, | 114 | .get = amd_get_mtrr, |
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c index de89f14eff3a..316fe3e60a97 100644 --- a/arch/x86/kernel/cpu/mtrr/centaur.c +++ b/arch/x86/kernel/cpu/mtrr/centaur.c | |||
@@ -110,7 +110,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t | |||
110 | return 0; | 110 | return 0; |
111 | } | 111 | } |
112 | 112 | ||
113 | static struct mtrr_ops centaur_mtrr_ops = { | 113 | static const struct mtrr_ops centaur_mtrr_ops = { |
114 | .vendor = X86_VENDOR_CENTAUR, | 114 | .vendor = X86_VENDOR_CENTAUR, |
115 | .set = centaur_set_mcr, | 115 | .set = centaur_set_mcr, |
116 | .get = centaur_get_mcr, | 116 | .get = centaur_get_mcr, |
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 09b1698e0466..c5f59d071425 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -22,10 +22,10 @@ | |||
22 | #include <linux/pci.h> | 22 | #include <linux/pci.h> |
23 | #include <linux/smp.h> | 23 | #include <linux/smp.h> |
24 | #include <linux/cpu.h> | 24 | #include <linux/cpu.h> |
25 | #include <linux/sort.h> | ||
26 | #include <linux/mutex.h> | 25 | #include <linux/mutex.h> |
27 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
28 | #include <linux/kvm_para.h> | 27 | #include <linux/kvm_para.h> |
28 | #include <linux/range.h> | ||
29 | 29 | ||
30 | #include <asm/processor.h> | 30 | #include <asm/processor.h> |
31 | #include <asm/e820.h> | 31 | #include <asm/e820.h> |
@@ -34,11 +34,6 @@ | |||
34 | 34 | ||
35 | #include "mtrr.h" | 35 | #include "mtrr.h" |
36 | 36 | ||
37 | struct res_range { | ||
38 | unsigned long start; | ||
39 | unsigned long end; | ||
40 | }; | ||
41 | |||
42 | struct var_mtrr_range_state { | 37 | struct var_mtrr_range_state { |
43 | unsigned long base_pfn; | 38 | unsigned long base_pfn; |
44 | unsigned long size_pfn; | 39 | unsigned long size_pfn; |
@@ -56,7 +51,7 @@ struct var_mtrr_state { | |||
56 | /* Should be related to MTRR_VAR_RANGES nums */ | 51 | /* Should be related to MTRR_VAR_RANGES nums */ |
57 | #define RANGE_NUM 256 | 52 | #define RANGE_NUM 256 |
58 | 53 | ||
59 | static struct res_range __initdata range[RANGE_NUM]; | 54 | static struct range __initdata range[RANGE_NUM]; |
60 | static int __initdata nr_range; | 55 | static int __initdata nr_range; |
61 | 56 | ||
62 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | 57 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; |
@@ -64,152 +59,11 @@ static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | |||
64 | static int __initdata debug_print; | 59 | static int __initdata debug_print; |
65 | #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) | 60 | #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) |
66 | 61 | ||
67 | |||
68 | static int __init | ||
69 | add_range(struct res_range *range, int nr_range, | ||
70 | unsigned long start, unsigned long end) | ||
71 | { | ||
72 | /* Out of slots: */ | ||
73 | if (nr_range >= RANGE_NUM) | ||
74 | return nr_range; | ||
75 | |||
76 | range[nr_range].start = start; | ||
77 | range[nr_range].end = end; | ||
78 | |||
79 | nr_range++; | ||
80 | |||
81 | return nr_range; | ||
82 | } | ||
83 | |||
84 | static int __init | ||
85 | add_range_with_merge(struct res_range *range, int nr_range, | ||
86 | unsigned long start, unsigned long end) | ||
87 | { | ||
88 | int i; | ||
89 | |||
90 | /* Try to merge it with old one: */ | ||
91 | for (i = 0; i < nr_range; i++) { | ||
92 | unsigned long final_start, final_end; | ||
93 | unsigned long common_start, common_end; | ||
94 | |||
95 | if (!range[i].end) | ||
96 | continue; | ||
97 | |||
98 | common_start = max(range[i].start, start); | ||
99 | common_end = min(range[i].end, end); | ||
100 | if (common_start > common_end + 1) | ||
101 | continue; | ||
102 | |||
103 | final_start = min(range[i].start, start); | ||
104 | final_end = max(range[i].end, end); | ||
105 | |||
106 | range[i].start = final_start; | ||
107 | range[i].end = final_end; | ||
108 | return nr_range; | ||
109 | } | ||
110 | |||
111 | /* Need to add it: */ | ||
112 | return add_range(range, nr_range, start, end); | ||
113 | } | ||
114 | |||
115 | static void __init | ||
116 | subtract_range(struct res_range *range, unsigned long start, unsigned long end) | ||
117 | { | ||
118 | int i, j; | ||
119 | |||
120 | for (j = 0; j < RANGE_NUM; j++) { | ||
121 | if (!range[j].end) | ||
122 | continue; | ||
123 | |||
124 | if (start <= range[j].start && end >= range[j].end) { | ||
125 | range[j].start = 0; | ||
126 | range[j].end = 0; | ||
127 | continue; | ||
128 | } | ||
129 | |||
130 | if (start <= range[j].start && end < range[j].end && | ||
131 | range[j].start < end + 1) { | ||
132 | range[j].start = end + 1; | ||
133 | continue; | ||
134 | } | ||
135 | |||
136 | |||
137 | if (start > range[j].start && end >= range[j].end && | ||
138 | range[j].end > start - 1) { | ||
139 | range[j].end = start - 1; | ||
140 | continue; | ||
141 | } | ||
142 | |||
143 | if (start > range[j].start && end < range[j].end) { | ||
144 | /* Find the new spare: */ | ||
145 | for (i = 0; i < RANGE_NUM; i++) { | ||
146 | if (range[i].end == 0) | ||
147 | break; | ||
148 | } | ||
149 | if (i < RANGE_NUM) { | ||
150 | range[i].end = range[j].end; | ||
151 | range[i].start = end + 1; | ||
152 | } else { | ||
153 | printk(KERN_ERR "run of slot in ranges\n"); | ||
154 | } | ||
155 | range[j].end = start - 1; | ||
156 | continue; | ||
157 | } | ||
158 | } | ||
159 | } | ||
160 | |||
161 | static int __init cmp_range(const void *x1, const void *x2) | ||
162 | { | ||
163 | const struct res_range *r1 = x1; | ||
164 | const struct res_range *r2 = x2; | ||
165 | long start1, start2; | ||
166 | |||
167 | start1 = r1->start; | ||
168 | start2 = r2->start; | ||
169 | |||
170 | return start1 - start2; | ||
171 | } | ||
172 | |||
173 | static int __init clean_sort_range(struct res_range *range, int az) | ||
174 | { | ||
175 | int i, j, k = az - 1, nr_range = 0; | ||
176 | |||
177 | for (i = 0; i < k; i++) { | ||
178 | if (range[i].end) | ||
179 | continue; | ||
180 | for (j = k; j > i; j--) { | ||
181 | if (range[j].end) { | ||
182 | k = j; | ||
183 | break; | ||
184 | } | ||
185 | } | ||
186 | if (j == i) | ||
187 | break; | ||
188 | range[i].start = range[k].start; | ||
189 | range[i].end = range[k].end; | ||
190 | range[k].start = 0; | ||
191 | range[k].end = 0; | ||
192 | k--; | ||
193 | } | ||
194 | /* count it */ | ||
195 | for (i = 0; i < az; i++) { | ||
196 | if (!range[i].end) { | ||
197 | nr_range = i; | ||
198 | break; | ||
199 | } | ||
200 | } | ||
201 | |||
202 | /* sort them */ | ||
203 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
204 | |||
205 | return nr_range; | ||
206 | } | ||
207 | |||
208 | #define BIOS_BUG_MSG KERN_WARNING \ | 62 | #define BIOS_BUG_MSG KERN_WARNING \ |
209 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" | 63 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" |
210 | 64 | ||
211 | static int __init | 65 | static int __init |
212 | x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | 66 | x86_get_mtrr_mem_range(struct range *range, int nr_range, |
213 | unsigned long extra_remove_base, | 67 | unsigned long extra_remove_base, |
214 | unsigned long extra_remove_size) | 68 | unsigned long extra_remove_size) |
215 | { | 69 | { |
@@ -223,14 +77,14 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
223 | continue; | 77 | continue; |
224 | base = range_state[i].base_pfn; | 78 | base = range_state[i].base_pfn; |
225 | size = range_state[i].size_pfn; | 79 | size = range_state[i].size_pfn; |
226 | nr_range = add_range_with_merge(range, nr_range, base, | 80 | nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, |
227 | base + size - 1); | 81 | base, base + size); |
228 | } | 82 | } |
229 | if (debug_print) { | 83 | if (debug_print) { |
230 | printk(KERN_DEBUG "After WB checking\n"); | 84 | printk(KERN_DEBUG "After WB checking\n"); |
231 | for (i = 0; i < nr_range; i++) | 85 | for (i = 0; i < nr_range; i++) |
232 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 86 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
233 | range[i].start, range[i].end + 1); | 87 | range[i].start, range[i].end); |
234 | } | 88 | } |
235 | 89 | ||
236 | /* Take out UC ranges: */ | 90 | /* Take out UC ranges: */ |
@@ -252,19 +106,19 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
252 | size -= (1<<(20-PAGE_SHIFT)) - base; | 106 | size -= (1<<(20-PAGE_SHIFT)) - base; |
253 | base = 1<<(20-PAGE_SHIFT); | 107 | base = 1<<(20-PAGE_SHIFT); |
254 | } | 108 | } |
255 | subtract_range(range, base, base + size - 1); | 109 | subtract_range(range, RANGE_NUM, base, base + size); |
256 | } | 110 | } |
257 | if (extra_remove_size) | 111 | if (extra_remove_size) |
258 | subtract_range(range, extra_remove_base, | 112 | subtract_range(range, RANGE_NUM, extra_remove_base, |
259 | extra_remove_base + extra_remove_size - 1); | 113 | extra_remove_base + extra_remove_size); |
260 | 114 | ||
261 | if (debug_print) { | 115 | if (debug_print) { |
262 | printk(KERN_DEBUG "After UC checking\n"); | 116 | printk(KERN_DEBUG "After UC checking\n"); |
263 | for (i = 0; i < RANGE_NUM; i++) { | 117 | for (i = 0; i < RANGE_NUM; i++) { |
264 | if (!range[i].end) | 118 | if (!range[i].end) |
265 | continue; | 119 | continue; |
266 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 120 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
267 | range[i].start, range[i].end + 1); | 121 | range[i].start, range[i].end); |
268 | } | 122 | } |
269 | } | 123 | } |
270 | 124 | ||
@@ -273,26 +127,22 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
273 | if (debug_print) { | 127 | if (debug_print) { |
274 | printk(KERN_DEBUG "After sorting\n"); | 128 | printk(KERN_DEBUG "After sorting\n"); |
275 | for (i = 0; i < nr_range; i++) | 129 | for (i = 0; i < nr_range; i++) |
276 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 130 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
277 | range[i].start, range[i].end + 1); | 131 | range[i].start, range[i].end); |
278 | } | 132 | } |
279 | 133 | ||
280 | /* clear those is not used */ | ||
281 | for (i = nr_range; i < RANGE_NUM; i++) | ||
282 | memset(&range[i], 0, sizeof(range[i])); | ||
283 | |||
284 | return nr_range; | 134 | return nr_range; |
285 | } | 135 | } |
286 | 136 | ||
287 | #ifdef CONFIG_MTRR_SANITIZER | 137 | #ifdef CONFIG_MTRR_SANITIZER |
288 | 138 | ||
289 | static unsigned long __init sum_ranges(struct res_range *range, int nr_range) | 139 | static unsigned long __init sum_ranges(struct range *range, int nr_range) |
290 | { | 140 | { |
291 | unsigned long sum = 0; | 141 | unsigned long sum = 0; |
292 | int i; | 142 | int i; |
293 | 143 | ||
294 | for (i = 0; i < nr_range; i++) | 144 | for (i = 0; i < nr_range; i++) |
295 | sum += range[i].end + 1 - range[i].start; | 145 | sum += range[i].end - range[i].start; |
296 | 146 | ||
297 | return sum; | 147 | return sum; |
298 | } | 148 | } |
@@ -621,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg) | |||
621 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); | 471 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); |
622 | 472 | ||
623 | static int __init | 473 | static int __init |
624 | x86_setup_var_mtrrs(struct res_range *range, int nr_range, | 474 | x86_setup_var_mtrrs(struct range *range, int nr_range, |
625 | u64 chunk_size, u64 gran_size) | 475 | u64 chunk_size, u64 gran_size) |
626 | { | 476 | { |
627 | struct var_mtrr_state var_state; | 477 | struct var_mtrr_state var_state; |
@@ -639,7 +489,7 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, | |||
639 | /* Write the range: */ | 489 | /* Write the range: */ |
640 | for (i = 0; i < nr_range; i++) { | 490 | for (i = 0; i < nr_range; i++) { |
641 | set_var_mtrr_range(&var_state, range[i].start, | 491 | set_var_mtrr_range(&var_state, range[i].start, |
642 | range[i].end - range[i].start + 1); | 492 | range[i].end - range[i].start); |
643 | } | 493 | } |
644 | 494 | ||
645 | /* Write the last range: */ | 495 | /* Write the last range: */ |
@@ -742,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size, | |||
742 | unsigned long x_remove_base, | 592 | unsigned long x_remove_base, |
743 | unsigned long x_remove_size, int i) | 593 | unsigned long x_remove_size, int i) |
744 | { | 594 | { |
745 | static struct res_range range_new[RANGE_NUM]; | 595 | static struct range range_new[RANGE_NUM]; |
746 | unsigned long range_sums_new; | 596 | unsigned long range_sums_new; |
747 | static int nr_range_new; | 597 | static int nr_range_new; |
748 | int num_reg; | 598 | int num_reg; |
@@ -782,9 +632,9 @@ static void __init mtrr_print_out_one_result(int i) | |||
782 | unsigned long gran_base, chunk_base, lose_base; | 632 | unsigned long gran_base, chunk_base, lose_base; |
783 | char gran_factor, chunk_factor, lose_factor; | 633 | char gran_factor, chunk_factor, lose_factor; |
784 | 634 | ||
785 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | 635 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor); |
786 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | 636 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor); |
787 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | 637 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor); |
788 | 638 | ||
789 | pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", | 639 | pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", |
790 | result[i].bad ? "*BAD*" : " ", | 640 | result[i].bad ? "*BAD*" : " ", |
@@ -869,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
869 | * [0, 1M) should always be covered by var mtrr with WB | 719 | * [0, 1M) should always be covered by var mtrr with WB |
870 | * and fixed mtrrs should take effect before var mtrr for it: | 720 | * and fixed mtrrs should take effect before var mtrr for it: |
871 | */ | 721 | */ |
872 | nr_range = add_range_with_merge(range, nr_range, 0, | 722 | nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0, |
873 | (1ULL<<(20 - PAGE_SHIFT)) - 1); | 723 | 1ULL<<(20 - PAGE_SHIFT)); |
874 | /* Sort the ranges: */ | 724 | /* Sort the ranges: */ |
875 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | 725 | sort_range(range, nr_range); |
876 | 726 | ||
877 | range_sums = sum_ranges(range, nr_range); | 727 | range_sums = sum_ranges(range, nr_range); |
878 | printk(KERN_INFO "total RAM covered: %ldM\n", | 728 | printk(KERN_INFO "total RAM covered: %ldM\n", |
@@ -1089,9 +939,9 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1089 | nr_range = 0; | 939 | nr_range = 0; |
1090 | if (mtrr_tom2) { | 940 | if (mtrr_tom2) { |
1091 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); | 941 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); |
1092 | range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; | 942 | range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT; |
1093 | if (highest_pfn < range[nr_range].end + 1) | 943 | if (highest_pfn < range[nr_range].end) |
1094 | highest_pfn = range[nr_range].end + 1; | 944 | highest_pfn = range[nr_range].end; |
1095 | nr_range++; | 945 | nr_range++; |
1096 | } | 946 | } |
1097 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); | 947 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); |
@@ -1103,15 +953,15 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1103 | 953 | ||
1104 | /* Check the holes: */ | 954 | /* Check the holes: */ |
1105 | for (i = 0; i < nr_range - 1; i++) { | 955 | for (i = 0; i < nr_range - 1; i++) { |
1106 | if (range[i].end + 1 < range[i+1].start) | 956 | if (range[i].end < range[i+1].start) |
1107 | total_trim_size += real_trim_memory(range[i].end + 1, | 957 | total_trim_size += real_trim_memory(range[i].end, |
1108 | range[i+1].start); | 958 | range[i+1].start); |
1109 | } | 959 | } |
1110 | 960 | ||
1111 | /* Check the top: */ | 961 | /* Check the top: */ |
1112 | i = nr_range - 1; | 962 | i = nr_range - 1; |
1113 | if (range[i].end + 1 < end_pfn) | 963 | if (range[i].end < end_pfn) |
1114 | total_trim_size += real_trim_memory(range[i].end + 1, | 964 | total_trim_size += real_trim_memory(range[i].end, |
1115 | end_pfn); | 965 | end_pfn); |
1116 | 966 | ||
1117 | if (total_trim_size) { | 967 | if (total_trim_size) { |
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 228d982ce09c..68a3343e5798 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c | |||
@@ -265,7 +265,7 @@ static void cyrix_set_all(void) | |||
265 | post_set(); | 265 | post_set(); |
266 | } | 266 | } |
267 | 267 | ||
268 | static struct mtrr_ops cyrix_mtrr_ops = { | 268 | static const struct mtrr_ops cyrix_mtrr_ops = { |
269 | .vendor = X86_VENDOR_CYRIX, | 269 | .vendor = X86_VENDOR_CYRIX, |
270 | .set_all = cyrix_set_all, | 270 | .set_all = cyrix_set_all, |
271 | .set = cyrix_set_arr, | 271 | .set = cyrix_set_arr, |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 55da0c5f68dd..7d28d7d03885 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -6,7 +6,6 @@ | |||
6 | 6 | ||
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/slab.h> | ||
10 | #include <linux/io.h> | 9 | #include <linux/io.h> |
11 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
12 | 11 | ||
@@ -434,13 +433,12 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
434 | { | 433 | { |
435 | unsigned int mask_lo, mask_hi, base_lo, base_hi; | 434 | unsigned int mask_lo, mask_hi, base_lo, base_hi; |
436 | unsigned int tmp, hi; | 435 | unsigned int tmp, hi; |
437 | int cpu; | ||
438 | 436 | ||
439 | /* | 437 | /* |
440 | * get_mtrr doesn't need to update mtrr_state, also it could be called | 438 | * get_mtrr doesn't need to update mtrr_state, also it could be called |
441 | * from any cpu, so try to print it out directly. | 439 | * from any cpu, so try to print it out directly. |
442 | */ | 440 | */ |
443 | cpu = get_cpu(); | 441 | get_cpu(); |
444 | 442 | ||
445 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); | 443 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); |
446 | 444 | ||
@@ -464,7 +462,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
464 | tmp |= ~((1<<(hi - 1)) - 1); | 462 | tmp |= ~((1<<(hi - 1)) - 1); |
465 | 463 | ||
466 | if (tmp != mask_lo) { | 464 | if (tmp != mask_lo) { |
467 | WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); | 465 | printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); |
468 | mask_lo = tmp; | 466 | mask_lo = tmp; |
469 | } | 467 | } |
470 | } | 468 | } |
@@ -570,7 +568,7 @@ static unsigned long set_mtrr_state(void) | |||
570 | 568 | ||
571 | 569 | ||
572 | static unsigned long cr4; | 570 | static unsigned long cr4; |
573 | static DEFINE_SPINLOCK(set_atomicity_lock); | 571 | static DEFINE_RAW_SPINLOCK(set_atomicity_lock); |
574 | 572 | ||
575 | /* | 573 | /* |
576 | * Since we are disabling the cache don't allow any interrupts, | 574 | * Since we are disabling the cache don't allow any interrupts, |
@@ -590,7 +588,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
590 | * changes to the way the kernel boots | 588 | * changes to the way the kernel boots |
591 | */ | 589 | */ |
592 | 590 | ||
593 | spin_lock(&set_atomicity_lock); | 591 | raw_spin_lock(&set_atomicity_lock); |
594 | 592 | ||
595 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ | 593 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ |
596 | cr0 = read_cr0() | X86_CR0_CD; | 594 | cr0 = read_cr0() | X86_CR0_CD; |
@@ -627,7 +625,7 @@ static void post_set(void) __releases(set_atomicity_lock) | |||
627 | /* Restore value of CR4 */ | 625 | /* Restore value of CR4 */ |
628 | if (cpu_has_pge) | 626 | if (cpu_has_pge) |
629 | write_cr4(cr4); | 627 | write_cr4(cr4); |
630 | spin_unlock(&set_atomicity_lock); | 628 | raw_spin_unlock(&set_atomicity_lock); |
631 | } | 629 | } |
632 | 630 | ||
633 | static void generic_set_all(void) | 631 | static void generic_set_all(void) |
@@ -752,7 +750,7 @@ int positive_have_wrcomb(void) | |||
752 | /* | 750 | /* |
753 | * Generic structure... | 751 | * Generic structure... |
754 | */ | 752 | */ |
755 | struct mtrr_ops generic_mtrr_ops = { | 753 | const struct mtrr_ops generic_mtrr_ops = { |
756 | .use_intel_if = 1, | 754 | .use_intel_if = 1, |
757 | .set_all = generic_set_all, | 755 | .set_all = generic_set_all, |
758 | .get = generic_get_mtrr, | 756 | .get = generic_get_mtrr, |
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index e006e56f699c..79289632cb27 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/ctype.h> | 6 | #include <linux/ctype.h> |
7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
8 | #include <linux/slab.h> | ||
8 | #include <linux/init.h> | 9 | #include <linux/init.h> |
9 | 10 | ||
10 | #define LINE_SIZE 80 | 11 | #define LINE_SIZE 80 |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 84e83de54575..01c0f3ee6cc3 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -35,6 +35,7 @@ | |||
35 | 35 | ||
36 | #include <linux/types.h> /* FIXME: kvm_para.h needs this */ | 36 | #include <linux/types.h> /* FIXME: kvm_para.h needs this */ |
37 | 37 | ||
38 | #include <linux/stop_machine.h> | ||
38 | #include <linux/kvm_para.h> | 39 | #include <linux/kvm_para.h> |
39 | #include <linux/uaccess.h> | 40 | #include <linux/uaccess.h> |
40 | #include <linux/module.h> | 41 | #include <linux/module.h> |
@@ -60,14 +61,14 @@ static DEFINE_MUTEX(mtrr_mutex); | |||
60 | u64 size_or_mask, size_and_mask; | 61 | u64 size_or_mask, size_and_mask; |
61 | static bool mtrr_aps_delayed_init; | 62 | static bool mtrr_aps_delayed_init; |
62 | 63 | ||
63 | static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; | 64 | static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; |
64 | 65 | ||
65 | struct mtrr_ops *mtrr_if; | 66 | const struct mtrr_ops *mtrr_if; |
66 | 67 | ||
67 | static void set_mtrr(unsigned int reg, unsigned long base, | 68 | static void set_mtrr(unsigned int reg, unsigned long base, |
68 | unsigned long size, mtrr_type type); | 69 | unsigned long size, mtrr_type type); |
69 | 70 | ||
70 | void set_mtrr_ops(struct mtrr_ops *ops) | 71 | void set_mtrr_ops(const struct mtrr_ops *ops) |
71 | { | 72 | { |
72 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) | 73 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) |
73 | mtrr_ops[ops->vendor] = ops; | 74 | mtrr_ops[ops->vendor] = ops; |
@@ -143,21 +144,28 @@ struct set_mtrr_data { | |||
143 | mtrr_type smp_type; | 144 | mtrr_type smp_type; |
144 | }; | 145 | }; |
145 | 146 | ||
147 | static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work); | ||
148 | |||
146 | /** | 149 | /** |
147 | * ipi_handler - Synchronisation handler. Executed by "other" CPUs. | 150 | * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs. |
151 | * @info: pointer to mtrr configuration data | ||
148 | * | 152 | * |
149 | * Returns nothing. | 153 | * Returns nothing. |
150 | */ | 154 | */ |
151 | static void ipi_handler(void *info) | 155 | static int mtrr_work_handler(void *info) |
152 | { | 156 | { |
153 | #ifdef CONFIG_SMP | 157 | #ifdef CONFIG_SMP |
154 | struct set_mtrr_data *data = info; | 158 | struct set_mtrr_data *data = info; |
155 | unsigned long flags; | 159 | unsigned long flags; |
156 | 160 | ||
161 | atomic_dec(&data->count); | ||
162 | while (!atomic_read(&data->gate)) | ||
163 | cpu_relax(); | ||
164 | |||
157 | local_irq_save(flags); | 165 | local_irq_save(flags); |
158 | 166 | ||
159 | atomic_dec(&data->count); | 167 | atomic_dec(&data->count); |
160 | while (!atomic_read(&data->gate)) | 168 | while (atomic_read(&data->gate)) |
161 | cpu_relax(); | 169 | cpu_relax(); |
162 | 170 | ||
163 | /* The master has cleared me to execute */ | 171 | /* The master has cleared me to execute */ |
@@ -172,12 +180,13 @@ static void ipi_handler(void *info) | |||
172 | } | 180 | } |
173 | 181 | ||
174 | atomic_dec(&data->count); | 182 | atomic_dec(&data->count); |
175 | while (atomic_read(&data->gate)) | 183 | while (!atomic_read(&data->gate)) |
176 | cpu_relax(); | 184 | cpu_relax(); |
177 | 185 | ||
178 | atomic_dec(&data->count); | 186 | atomic_dec(&data->count); |
179 | local_irq_restore(flags); | 187 | local_irq_restore(flags); |
180 | #endif | 188 | #endif |
189 | return 0; | ||
181 | } | 190 | } |
182 | 191 | ||
183 | static inline int types_compatible(mtrr_type type1, mtrr_type type2) | 192 | static inline int types_compatible(mtrr_type type1, mtrr_type type2) |
@@ -197,7 +206,7 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) | |||
197 | * | 206 | * |
198 | * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: | 207 | * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: |
199 | * | 208 | * |
200 | * 1. Send IPI to do the following: | 209 | * 1. Queue work to do the following on all processors: |
201 | * 2. Disable Interrupts | 210 | * 2. Disable Interrupts |
202 | * 3. Wait for all procs to do so | 211 | * 3. Wait for all procs to do so |
203 | * 4. Enter no-fill cache mode | 212 | * 4. Enter no-fill cache mode |
@@ -214,14 +223,17 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) | |||
214 | * 15. Enable interrupts. | 223 | * 15. Enable interrupts. |
215 | * | 224 | * |
216 | * What does that mean for us? Well, first we set data.count to the number | 225 | * What does that mean for us? Well, first we set data.count to the number |
217 | * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait | 226 | * of CPUs. As each CPU announces that it started the rendezvous handler by |
218 | * until it hits 0 and proceed. We set the data.gate flag and reset data.count. | 227 | * decrementing the count, We reset data.count and set the data.gate flag |
219 | * Meanwhile, they are waiting for that flag to be set. Once it's set, each | 228 | * allowing all the cpu's to proceed with the work. As each cpu disables |
229 | * interrupts, it'll decrement data.count once. We wait until it hits 0 and | ||
230 | * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they | ||
231 | * are waiting for that flag to be cleared. Once it's cleared, each | ||
220 | * CPU goes through the transition of updating MTRRs. | 232 | * CPU goes through the transition of updating MTRRs. |
221 | * The CPU vendors may each do it differently, | 233 | * The CPU vendors may each do it differently, |
222 | * so we call mtrr_if->set() callback and let them take care of it. | 234 | * so we call mtrr_if->set() callback and let them take care of it. |
223 | * When they're done, they again decrement data->count and wait for data.gate | 235 | * When they're done, they again decrement data->count and wait for data.gate |
224 | * to be reset. | 236 | * to be set. |
225 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag | 237 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag |
226 | * Everyone then enables interrupts and we all continue on. | 238 | * Everyone then enables interrupts and we all continue on. |
227 | * | 239 | * |
@@ -233,6 +245,9 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
233 | { | 245 | { |
234 | struct set_mtrr_data data; | 246 | struct set_mtrr_data data; |
235 | unsigned long flags; | 247 | unsigned long flags; |
248 | int cpu; | ||
249 | |||
250 | preempt_disable(); | ||
236 | 251 | ||
237 | data.smp_reg = reg; | 252 | data.smp_reg = reg; |
238 | data.smp_base = base; | 253 | data.smp_base = base; |
@@ -245,10 +260,15 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
245 | atomic_set(&data.gate, 0); | 260 | atomic_set(&data.gate, 0); |
246 | 261 | ||
247 | /* Start the ball rolling on other CPUs */ | 262 | /* Start the ball rolling on other CPUs */ |
248 | if (smp_call_function(ipi_handler, &data, 0) != 0) | 263 | for_each_online_cpu(cpu) { |
249 | panic("mtrr: timed out waiting for other CPUs\n"); | 264 | struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu); |
265 | |||
266 | if (cpu == smp_processor_id()) | ||
267 | continue; | ||
268 | |||
269 | stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work); | ||
270 | } | ||
250 | 271 | ||
251 | local_irq_save(flags); | ||
252 | 272 | ||
253 | while (atomic_read(&data.count)) | 273 | while (atomic_read(&data.count)) |
254 | cpu_relax(); | 274 | cpu_relax(); |
@@ -258,6 +278,16 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
258 | smp_wmb(); | 278 | smp_wmb(); |
259 | atomic_set(&data.gate, 1); | 279 | atomic_set(&data.gate, 1); |
260 | 280 | ||
281 | local_irq_save(flags); | ||
282 | |||
283 | while (atomic_read(&data.count)) | ||
284 | cpu_relax(); | ||
285 | |||
286 | /* Ok, reset count and toggle gate */ | ||
287 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
288 | smp_wmb(); | ||
289 | atomic_set(&data.gate, 0); | ||
290 | |||
261 | /* Do our MTRR business */ | 291 | /* Do our MTRR business */ |
262 | 292 | ||
263 | /* | 293 | /* |
@@ -278,7 +308,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
278 | 308 | ||
279 | atomic_set(&data.count, num_booting_cpus() - 1); | 309 | atomic_set(&data.count, num_booting_cpus() - 1); |
280 | smp_wmb(); | 310 | smp_wmb(); |
281 | atomic_set(&data.gate, 0); | 311 | atomic_set(&data.gate, 1); |
282 | 312 | ||
283 | /* | 313 | /* |
284 | * Wait here for everyone to have seen the gate change | 314 | * Wait here for everyone to have seen the gate change |
@@ -288,6 +318,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
288 | cpu_relax(); | 318 | cpu_relax(); |
289 | 319 | ||
290 | local_irq_restore(flags); | 320 | local_irq_restore(flags); |
321 | preempt_enable(); | ||
291 | } | 322 | } |
292 | 323 | ||
293 | /** | 324 | /** |
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index a501dee9a87a..df5e41f31a27 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -32,7 +32,7 @@ extern int generic_get_free_region(unsigned long base, unsigned long size, | |||
32 | extern int generic_validate_add_page(unsigned long base, unsigned long size, | 32 | extern int generic_validate_add_page(unsigned long base, unsigned long size, |
33 | unsigned int type); | 33 | unsigned int type); |
34 | 34 | ||
35 | extern struct mtrr_ops generic_mtrr_ops; | 35 | extern const struct mtrr_ops generic_mtrr_ops; |
36 | 36 | ||
37 | extern int positive_have_wrcomb(void); | 37 | extern int positive_have_wrcomb(void); |
38 | 38 | ||
@@ -53,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index, | |||
53 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); | 53 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); |
54 | void get_mtrr_state(void); | 54 | void get_mtrr_state(void); |
55 | 55 | ||
56 | extern void set_mtrr_ops(struct mtrr_ops *ops); | 56 | extern void set_mtrr_ops(const struct mtrr_ops *ops); |
57 | 57 | ||
58 | extern u64 size_or_mask, size_and_mask; | 58 | extern u64 size_or_mask, size_and_mask; |
59 | extern struct mtrr_ops *mtrr_if; | 59 | extern const struct mtrr_ops *mtrr_if; |
60 | 60 | ||
61 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) | 61 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) |
62 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) | 62 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) |
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c deleted file mode 100644 index dfc80b4e6b0d..000000000000 --- a/arch/x86/kernel/cpu/mtrr/state.c +++ /dev/null | |||
@@ -1,94 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/io.h> | ||
3 | #include <linux/mm.h> | ||
4 | |||
5 | #include <asm/processor-cyrix.h> | ||
6 | #include <asm/processor-flags.h> | ||
7 | #include <asm/mtrr.h> | ||
8 | #include <asm/msr.h> | ||
9 | |||
10 | #include "mtrr.h" | ||
11 | |||
12 | /* Put the processor into a state where MTRRs can be safely set */ | ||
13 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) | ||
14 | { | ||
15 | unsigned int cr0; | ||
16 | |||
17 | /* Disable interrupts locally */ | ||
18 | local_irq_save(ctxt->flags); | ||
19 | |||
20 | if (use_intel() || is_cpu(CYRIX)) { | ||
21 | |||
22 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | ||
23 | if (cpu_has_pge) { | ||
24 | ctxt->cr4val = read_cr4(); | ||
25 | write_cr4(ctxt->cr4val & ~X86_CR4_PGE); | ||
26 | } | ||
27 | |||
28 | /* | ||
29 | * Disable and flush caches. Note that wbinvd flushes the TLBs | ||
30 | * as a side-effect | ||
31 | */ | ||
32 | cr0 = read_cr0() | X86_CR0_CD; | ||
33 | wbinvd(); | ||
34 | write_cr0(cr0); | ||
35 | wbinvd(); | ||
36 | |||
37 | if (use_intel()) { | ||
38 | /* Save MTRR state */ | ||
39 | rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); | ||
40 | } else { | ||
41 | /* | ||
42 | * Cyrix ARRs - | ||
43 | * everything else were excluded at the top | ||
44 | */ | ||
45 | ctxt->ccr3 = getCx86(CX86_CCR3); | ||
46 | } | ||
47 | } | ||
48 | } | ||
49 | |||
50 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) | ||
51 | { | ||
52 | if (use_intel()) { | ||
53 | /* Disable MTRRs, and set the default type to uncached */ | ||
54 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, | ||
55 | ctxt->deftype_hi); | ||
56 | } else { | ||
57 | if (is_cpu(CYRIX)) { | ||
58 | /* Cyrix ARRs - everything else were excluded at the top */ | ||
59 | setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); | ||
60 | } | ||
61 | } | ||
62 | } | ||
63 | |||
64 | /* Restore the processor after a set_mtrr_prepare */ | ||
65 | void set_mtrr_done(struct set_mtrr_context *ctxt) | ||
66 | { | ||
67 | if (use_intel() || is_cpu(CYRIX)) { | ||
68 | |||
69 | /* Flush caches and TLBs */ | ||
70 | wbinvd(); | ||
71 | |||
72 | /* Restore MTRRdefType */ | ||
73 | if (use_intel()) { | ||
74 | /* Intel (P6) standard MTRRs */ | ||
75 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, | ||
76 | ctxt->deftype_hi); | ||
77 | } else { | ||
78 | /* | ||
79 | * Cyrix ARRs - | ||
80 | * everything else was excluded at the top | ||
81 | */ | ||
82 | setCx86(CX86_CCR3, ctxt->ccr3); | ||
83 | } | ||
84 | |||
85 | /* Enable caches */ | ||
86 | write_cr0(read_cr0() & 0xbfffffff); | ||
87 | |||
88 | /* Restore value of CR4 */ | ||
89 | if (cpu_has_pge) | ||
90 | write_cr4(ctxt->cr4val); | ||
91 | } | ||
92 | /* Re-enable interrupts locally (if enabled previously) */ | ||
93 | local_irq_restore(ctxt->flags); | ||
94 | } | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8c1c07073ccc..03a5b0385ad6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -7,6 +7,7 @@ | |||
7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | 7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter |
8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> |
9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | 9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> |
10 | * Copyright (C) 2009 Google, Inc., Stephane Eranian | ||
10 | * | 11 | * |
11 | * For licencing details see kernel-base/COPYING | 12 | * For licencing details see kernel-base/COPYING |
12 | */ | 13 | */ |
@@ -20,215 +21,241 @@ | |||
20 | #include <linux/kdebug.h> | 21 | #include <linux/kdebug.h> |
21 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
22 | #include <linux/uaccess.h> | 23 | #include <linux/uaccess.h> |
24 | #include <linux/slab.h> | ||
23 | #include <linux/highmem.h> | 25 | #include <linux/highmem.h> |
24 | #include <linux/cpu.h> | 26 | #include <linux/cpu.h> |
27 | #include <linux/bitops.h> | ||
25 | 28 | ||
26 | #include <asm/apic.h> | 29 | #include <asm/apic.h> |
27 | #include <asm/stacktrace.h> | 30 | #include <asm/stacktrace.h> |
28 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <asm/compat.h> | ||
33 | |||
34 | #if 0 | ||
35 | #undef wrmsrl | ||
36 | #define wrmsrl(msr, val) \ | ||
37 | do { \ | ||
38 | trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ | ||
39 | (unsigned long)(val)); \ | ||
40 | native_write_msr((msr), (u32)((u64)(val)), \ | ||
41 | (u32)((u64)(val) >> 32)); \ | ||
42 | } while (0) | ||
43 | #endif | ||
29 | 44 | ||
30 | static u64 perf_event_mask __read_mostly; | 45 | /* |
46 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | ||
47 | */ | ||
48 | static unsigned long | ||
49 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
50 | { | ||
51 | unsigned long offset, addr = (unsigned long)from; | ||
52 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
53 | unsigned long size, len = 0; | ||
54 | struct page *page; | ||
55 | void *map; | ||
56 | int ret; | ||
31 | 57 | ||
32 | /* The maximal number of PEBS events: */ | 58 | do { |
33 | #define MAX_PEBS_EVENTS 4 | 59 | ret = __get_user_pages_fast(addr, 1, 0, &page); |
60 | if (!ret) | ||
61 | break; | ||
34 | 62 | ||
35 | /* The size of a BTS record in bytes: */ | 63 | offset = addr & (PAGE_SIZE - 1); |
36 | #define BTS_RECORD_SIZE 24 | 64 | size = min(PAGE_SIZE - offset, n - len); |
37 | 65 | ||
38 | /* The size of a per-cpu BTS buffer in bytes: */ | 66 | map = kmap_atomic(page, type); |
39 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) | 67 | memcpy(to, map+offset, size); |
68 | kunmap_atomic(map, type); | ||
69 | put_page(page); | ||
70 | |||
71 | len += size; | ||
72 | to += size; | ||
73 | addr += size; | ||
40 | 74 | ||
41 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | 75 | } while (len < n); |
42 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) | ||
43 | 76 | ||
77 | return len; | ||
78 | } | ||
44 | 79 | ||
45 | /* | 80 | struct event_constraint { |
46 | * Bits in the debugctlmsr controlling branch tracing. | 81 | union { |
47 | */ | 82 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
48 | #define X86_DEBUGCTL_TR (1 << 6) | 83 | u64 idxmsk64; |
49 | #define X86_DEBUGCTL_BTS (1 << 7) | 84 | }; |
50 | #define X86_DEBUGCTL_BTINT (1 << 8) | 85 | u64 code; |
51 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | 86 | u64 cmask; |
52 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | 87 | int weight; |
88 | }; | ||
53 | 89 | ||
54 | /* | 90 | struct amd_nb { |
55 | * A debug store configuration. | 91 | int nb_id; /* NorthBridge id */ |
56 | * | 92 | int refcnt; /* reference count */ |
57 | * We only support architectures that use 64bit fields. | 93 | struct perf_event *owners[X86_PMC_IDX_MAX]; |
58 | */ | 94 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
59 | struct debug_store { | ||
60 | u64 bts_buffer_base; | ||
61 | u64 bts_index; | ||
62 | u64 bts_absolute_maximum; | ||
63 | u64 bts_interrupt_threshold; | ||
64 | u64 pebs_buffer_base; | ||
65 | u64 pebs_index; | ||
66 | u64 pebs_absolute_maximum; | ||
67 | u64 pebs_interrupt_threshold; | ||
68 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
69 | }; | 95 | }; |
70 | 96 | ||
97 | #define MAX_LBR_ENTRIES 16 | ||
98 | |||
71 | struct cpu_hw_events { | 99 | struct cpu_hw_events { |
72 | struct perf_event *events[X86_PMC_IDX_MAX]; | 100 | /* |
73 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 101 | * Generic x86 PMC bits |
102 | */ | ||
103 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | ||
74 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 104 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
75 | unsigned long interrupts; | 105 | unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
76 | int enabled; | 106 | int enabled; |
107 | |||
108 | int n_events; | ||
109 | int n_added; | ||
110 | int n_txn; | ||
111 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | ||
112 | u64 tags[X86_PMC_IDX_MAX]; | ||
113 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | ||
114 | |||
115 | unsigned int group_flag; | ||
116 | |||
117 | /* | ||
118 | * Intel DebugStore bits | ||
119 | */ | ||
77 | struct debug_store *ds; | 120 | struct debug_store *ds; |
78 | }; | 121 | u64 pebs_enabled; |
79 | 122 | ||
80 | struct event_constraint { | 123 | /* |
81 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 124 | * Intel LBR bits |
82 | int code; | 125 | */ |
126 | int lbr_users; | ||
127 | void *lbr_context; | ||
128 | struct perf_branch_stack lbr_stack; | ||
129 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | ||
130 | |||
131 | /* | ||
132 | * AMD specific bits | ||
133 | */ | ||
134 | struct amd_nb *amd_nb; | ||
83 | }; | 135 | }; |
84 | 136 | ||
85 | #define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } | 137 | #define __EVENT_CONSTRAINT(c, n, m, w) {\ |
86 | #define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } | 138 | { .idxmsk64 = (n) }, \ |
139 | .code = (c), \ | ||
140 | .cmask = (m), \ | ||
141 | .weight = (w), \ | ||
142 | } | ||
87 | 143 | ||
88 | #define for_each_event_constraint(e, c) \ | 144 | #define EVENT_CONSTRAINT(c, n, m) \ |
89 | for ((e) = (c); (e)->idxmsk[0]; (e)++) | 145 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) |
90 | 146 | ||
147 | /* | ||
148 | * Constraint on the Event code. | ||
149 | */ | ||
150 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | ||
151 | EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) | ||
152 | |||
153 | /* | ||
154 | * Constraint on the Event code + UMask + fixed-mask | ||
155 | * | ||
156 | * filter mask to validate fixed counter events. | ||
157 | * the following filters disqualify for fixed counters: | ||
158 | * - inv | ||
159 | * - edge | ||
160 | * - cnt-mask | ||
161 | * The other filters are supported by fixed counters. | ||
162 | * The any-thread option is supported starting with v3. | ||
163 | */ | ||
164 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | ||
165 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) | ||
166 | |||
167 | /* | ||
168 | * Constraint on the Event code + UMask | ||
169 | */ | ||
170 | #define PEBS_EVENT_CONSTRAINT(c, n) \ | ||
171 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | ||
172 | |||
173 | #define EVENT_CONSTRAINT_END \ | ||
174 | EVENT_CONSTRAINT(0, 0, 0) | ||
175 | |||
176 | #define for_each_event_constraint(e, c) \ | ||
177 | for ((e) = (c); (e)->weight; (e)++) | ||
178 | |||
179 | union perf_capabilities { | ||
180 | struct { | ||
181 | u64 lbr_format : 6; | ||
182 | u64 pebs_trap : 1; | ||
183 | u64 pebs_arch_reg : 1; | ||
184 | u64 pebs_format : 4; | ||
185 | u64 smm_freeze : 1; | ||
186 | }; | ||
187 | u64 capabilities; | ||
188 | }; | ||
91 | 189 | ||
92 | /* | 190 | /* |
93 | * struct x86_pmu - generic x86 pmu | 191 | * struct x86_pmu - generic x86 pmu |
94 | */ | 192 | */ |
95 | struct x86_pmu { | 193 | struct x86_pmu { |
194 | /* | ||
195 | * Generic x86 PMC bits | ||
196 | */ | ||
96 | const char *name; | 197 | const char *name; |
97 | int version; | 198 | int version; |
98 | int (*handle_irq)(struct pt_regs *); | 199 | int (*handle_irq)(struct pt_regs *); |
99 | void (*disable_all)(void); | 200 | void (*disable_all)(void); |
100 | void (*enable_all)(void); | 201 | void (*enable_all)(int added); |
101 | void (*enable)(struct hw_perf_event *, int); | 202 | void (*enable)(struct perf_event *); |
102 | void (*disable)(struct hw_perf_event *, int); | 203 | void (*disable)(struct perf_event *); |
204 | int (*hw_config)(struct perf_event *event); | ||
205 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); | ||
103 | unsigned eventsel; | 206 | unsigned eventsel; |
104 | unsigned perfctr; | 207 | unsigned perfctr; |
105 | u64 (*event_map)(int); | 208 | u64 (*event_map)(int); |
106 | u64 (*raw_event)(u64); | ||
107 | int max_events; | 209 | int max_events; |
108 | int num_events; | 210 | int num_counters; |
109 | int num_events_fixed; | 211 | int num_counters_fixed; |
110 | int event_bits; | 212 | int cntval_bits; |
111 | u64 event_mask; | 213 | u64 cntval_mask; |
112 | int apic; | 214 | int apic; |
113 | u64 max_period; | 215 | u64 max_period; |
114 | u64 intel_ctrl; | 216 | struct event_constraint * |
115 | void (*enable_bts)(u64 config); | 217 | (*get_event_constraints)(struct cpu_hw_events *cpuc, |
116 | void (*disable_bts)(void); | 218 | struct perf_event *event); |
117 | int (*get_event_idx)(struct cpu_hw_events *cpuc, | ||
118 | struct hw_perf_event *hwc); | ||
119 | }; | ||
120 | 219 | ||
121 | static struct x86_pmu x86_pmu __read_mostly; | 220 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
221 | struct perf_event *event); | ||
222 | struct event_constraint *event_constraints; | ||
223 | void (*quirks)(void); | ||
224 | int perfctr_second_write; | ||
122 | 225 | ||
123 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | 226 | int (*cpu_prepare)(int cpu); |
124 | .enabled = 1, | 227 | void (*cpu_starting)(int cpu); |
125 | }; | 228 | void (*cpu_dying)(int cpu); |
126 | 229 | void (*cpu_dead)(int cpu); | |
127 | static const struct event_constraint *event_constraints; | ||
128 | |||
129 | /* | ||
130 | * Not sure about some of these | ||
131 | */ | ||
132 | static const u64 p6_perfmon_event_map[] = | ||
133 | { | ||
134 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | ||
135 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
136 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, | ||
137 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, | ||
138 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
139 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
140 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | ||
141 | }; | ||
142 | |||
143 | static u64 p6_pmu_event_map(int hw_event) | ||
144 | { | ||
145 | return p6_perfmon_event_map[hw_event]; | ||
146 | } | ||
147 | 230 | ||
148 | /* | 231 | /* |
149 | * Event setting that is specified not to count anything. | 232 | * Intel Arch Perfmon v2+ |
150 | * We use this to effectively disable a counter. | 233 | */ |
151 | * | 234 | u64 intel_ctrl; |
152 | * L2_RQSTS with 0 MESI unit mask. | 235 | union perf_capabilities intel_cap; |
153 | */ | ||
154 | #define P6_NOP_EVENT 0x0000002EULL | ||
155 | |||
156 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
157 | { | ||
158 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
159 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
160 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
161 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
162 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
163 | |||
164 | #define P6_EVNTSEL_MASK \ | ||
165 | (P6_EVNTSEL_EVENT_MASK | \ | ||
166 | P6_EVNTSEL_UNIT_MASK | \ | ||
167 | P6_EVNTSEL_EDGE_MASK | \ | ||
168 | P6_EVNTSEL_INV_MASK | \ | ||
169 | P6_EVNTSEL_REG_MASK) | ||
170 | |||
171 | return hw_event & P6_EVNTSEL_MASK; | ||
172 | } | ||
173 | 236 | ||
174 | static const struct event_constraint intel_p6_event_constraints[] = | 237 | /* |
175 | { | 238 | * Intel DebugStore bits |
176 | EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | 239 | */ |
177 | EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | 240 | int bts, pebs; |
178 | EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | 241 | int pebs_record_size; |
179 | EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | 242 | void (*drain_pebs)(struct pt_regs *regs); |
180 | EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | 243 | struct event_constraint *pebs_constraints; |
181 | EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
182 | EVENT_CONSTRAINT_END | ||
183 | }; | ||
184 | 244 | ||
185 | /* | 245 | /* |
186 | * Intel PerfMon v3. Used on Core2 and later. | 246 | * Intel LBR |
187 | */ | 247 | */ |
188 | static const u64 intel_perfmon_event_map[] = | 248 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ |
189 | { | 249 | int lbr_nr; /* hardware stack size */ |
190 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | ||
191 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
192 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | ||
193 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | ||
194 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
195 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
196 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | ||
197 | }; | 250 | }; |
198 | 251 | ||
199 | static const struct event_constraint intel_core_event_constraints[] = | 252 | static struct x86_pmu x86_pmu __read_mostly; |
200 | { | ||
201 | EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
202 | EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
203 | EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
204 | EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
205 | EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
206 | EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ | ||
207 | EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
208 | EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ | ||
209 | EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ | ||
210 | EVENT_CONSTRAINT_END | ||
211 | }; | ||
212 | 253 | ||
213 | static const struct event_constraint intel_nehalem_event_constraints[] = | 254 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { |
214 | { | 255 | .enabled = 1, |
215 | EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | ||
216 | EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | ||
217 | EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | ||
218 | EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ | ||
219 | EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ | ||
220 | EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ | ||
221 | EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
222 | EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ | ||
223 | EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ | ||
224 | EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ | ||
225 | EVENT_CONSTRAINT_END | ||
226 | }; | 256 | }; |
227 | 257 | ||
228 | static u64 intel_pmu_event_map(int hw_event) | 258 | static int x86_perf_event_set_period(struct perf_event *event); |
229 | { | ||
230 | return intel_perfmon_event_map[hw_event]; | ||
231 | } | ||
232 | 259 | ||
233 | /* | 260 | /* |
234 | * Generalized hw caching related hw_event table, filled | 261 | * Generalized hw caching related hw_event table, filled |
@@ -245,435 +272,18 @@ static u64 __read_mostly hw_cache_event_ids | |||
245 | [PERF_COUNT_HW_CACHE_OP_MAX] | 272 | [PERF_COUNT_HW_CACHE_OP_MAX] |
246 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 273 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
247 | 274 | ||
248 | static __initconst u64 nehalem_hw_cache_event_ids | ||
249 | [PERF_COUNT_HW_CACHE_MAX] | ||
250 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
251 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
252 | { | ||
253 | [ C(L1D) ] = { | ||
254 | [ C(OP_READ) ] = { | ||
255 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
256 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
257 | }, | ||
258 | [ C(OP_WRITE) ] = { | ||
259 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
260 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
261 | }, | ||
262 | [ C(OP_PREFETCH) ] = { | ||
263 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
264 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
265 | }, | ||
266 | }, | ||
267 | [ C(L1I ) ] = { | ||
268 | [ C(OP_READ) ] = { | ||
269 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
270 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
271 | }, | ||
272 | [ C(OP_WRITE) ] = { | ||
273 | [ C(RESULT_ACCESS) ] = -1, | ||
274 | [ C(RESULT_MISS) ] = -1, | ||
275 | }, | ||
276 | [ C(OP_PREFETCH) ] = { | ||
277 | [ C(RESULT_ACCESS) ] = 0x0, | ||
278 | [ C(RESULT_MISS) ] = 0x0, | ||
279 | }, | ||
280 | }, | ||
281 | [ C(LL ) ] = { | ||
282 | [ C(OP_READ) ] = { | ||
283 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
284 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
285 | }, | ||
286 | [ C(OP_WRITE) ] = { | ||
287 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
288 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
289 | }, | ||
290 | [ C(OP_PREFETCH) ] = { | ||
291 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
292 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
293 | }, | ||
294 | }, | ||
295 | [ C(DTLB) ] = { | ||
296 | [ C(OP_READ) ] = { | ||
297 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
298 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
299 | }, | ||
300 | [ C(OP_WRITE) ] = { | ||
301 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
302 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
303 | }, | ||
304 | [ C(OP_PREFETCH) ] = { | ||
305 | [ C(RESULT_ACCESS) ] = 0x0, | ||
306 | [ C(RESULT_MISS) ] = 0x0, | ||
307 | }, | ||
308 | }, | ||
309 | [ C(ITLB) ] = { | ||
310 | [ C(OP_READ) ] = { | ||
311 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
312 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ | ||
313 | }, | ||
314 | [ C(OP_WRITE) ] = { | ||
315 | [ C(RESULT_ACCESS) ] = -1, | ||
316 | [ C(RESULT_MISS) ] = -1, | ||
317 | }, | ||
318 | [ C(OP_PREFETCH) ] = { | ||
319 | [ C(RESULT_ACCESS) ] = -1, | ||
320 | [ C(RESULT_MISS) ] = -1, | ||
321 | }, | ||
322 | }, | ||
323 | [ C(BPU ) ] = { | ||
324 | [ C(OP_READ) ] = { | ||
325 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
326 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
327 | }, | ||
328 | [ C(OP_WRITE) ] = { | ||
329 | [ C(RESULT_ACCESS) ] = -1, | ||
330 | [ C(RESULT_MISS) ] = -1, | ||
331 | }, | ||
332 | [ C(OP_PREFETCH) ] = { | ||
333 | [ C(RESULT_ACCESS) ] = -1, | ||
334 | [ C(RESULT_MISS) ] = -1, | ||
335 | }, | ||
336 | }, | ||
337 | }; | ||
338 | |||
339 | static __initconst u64 core2_hw_cache_event_ids | ||
340 | [PERF_COUNT_HW_CACHE_MAX] | ||
341 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
342 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
343 | { | ||
344 | [ C(L1D) ] = { | ||
345 | [ C(OP_READ) ] = { | ||
346 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
347 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
348 | }, | ||
349 | [ C(OP_WRITE) ] = { | ||
350 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
351 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
352 | }, | ||
353 | [ C(OP_PREFETCH) ] = { | ||
354 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ | ||
355 | [ C(RESULT_MISS) ] = 0, | ||
356 | }, | ||
357 | }, | ||
358 | [ C(L1I ) ] = { | ||
359 | [ C(OP_READ) ] = { | ||
360 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ | ||
361 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ | ||
362 | }, | ||
363 | [ C(OP_WRITE) ] = { | ||
364 | [ C(RESULT_ACCESS) ] = -1, | ||
365 | [ C(RESULT_MISS) ] = -1, | ||
366 | }, | ||
367 | [ C(OP_PREFETCH) ] = { | ||
368 | [ C(RESULT_ACCESS) ] = 0, | ||
369 | [ C(RESULT_MISS) ] = 0, | ||
370 | }, | ||
371 | }, | ||
372 | [ C(LL ) ] = { | ||
373 | [ C(OP_READ) ] = { | ||
374 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
375 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
376 | }, | ||
377 | [ C(OP_WRITE) ] = { | ||
378 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
379 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
380 | }, | ||
381 | [ C(OP_PREFETCH) ] = { | ||
382 | [ C(RESULT_ACCESS) ] = 0, | ||
383 | [ C(RESULT_MISS) ] = 0, | ||
384 | }, | ||
385 | }, | ||
386 | [ C(DTLB) ] = { | ||
387 | [ C(OP_READ) ] = { | ||
388 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
389 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ | ||
390 | }, | ||
391 | [ C(OP_WRITE) ] = { | ||
392 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
393 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ | ||
394 | }, | ||
395 | [ C(OP_PREFETCH) ] = { | ||
396 | [ C(RESULT_ACCESS) ] = 0, | ||
397 | [ C(RESULT_MISS) ] = 0, | ||
398 | }, | ||
399 | }, | ||
400 | [ C(ITLB) ] = { | ||
401 | [ C(OP_READ) ] = { | ||
402 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
403 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ | ||
404 | }, | ||
405 | [ C(OP_WRITE) ] = { | ||
406 | [ C(RESULT_ACCESS) ] = -1, | ||
407 | [ C(RESULT_MISS) ] = -1, | ||
408 | }, | ||
409 | [ C(OP_PREFETCH) ] = { | ||
410 | [ C(RESULT_ACCESS) ] = -1, | ||
411 | [ C(RESULT_MISS) ] = -1, | ||
412 | }, | ||
413 | }, | ||
414 | [ C(BPU ) ] = { | ||
415 | [ C(OP_READ) ] = { | ||
416 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
417 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
418 | }, | ||
419 | [ C(OP_WRITE) ] = { | ||
420 | [ C(RESULT_ACCESS) ] = -1, | ||
421 | [ C(RESULT_MISS) ] = -1, | ||
422 | }, | ||
423 | [ C(OP_PREFETCH) ] = { | ||
424 | [ C(RESULT_ACCESS) ] = -1, | ||
425 | [ C(RESULT_MISS) ] = -1, | ||
426 | }, | ||
427 | }, | ||
428 | }; | ||
429 | |||
430 | static __initconst u64 atom_hw_cache_event_ids | ||
431 | [PERF_COUNT_HW_CACHE_MAX] | ||
432 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
433 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
434 | { | ||
435 | [ C(L1D) ] = { | ||
436 | [ C(OP_READ) ] = { | ||
437 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ | ||
438 | [ C(RESULT_MISS) ] = 0, | ||
439 | }, | ||
440 | [ C(OP_WRITE) ] = { | ||
441 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ | ||
442 | [ C(RESULT_MISS) ] = 0, | ||
443 | }, | ||
444 | [ C(OP_PREFETCH) ] = { | ||
445 | [ C(RESULT_ACCESS) ] = 0x0, | ||
446 | [ C(RESULT_MISS) ] = 0, | ||
447 | }, | ||
448 | }, | ||
449 | [ C(L1I ) ] = { | ||
450 | [ C(OP_READ) ] = { | ||
451 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
452 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
453 | }, | ||
454 | [ C(OP_WRITE) ] = { | ||
455 | [ C(RESULT_ACCESS) ] = -1, | ||
456 | [ C(RESULT_MISS) ] = -1, | ||
457 | }, | ||
458 | [ C(OP_PREFETCH) ] = { | ||
459 | [ C(RESULT_ACCESS) ] = 0, | ||
460 | [ C(RESULT_MISS) ] = 0, | ||
461 | }, | ||
462 | }, | ||
463 | [ C(LL ) ] = { | ||
464 | [ C(OP_READ) ] = { | ||
465 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
466 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
467 | }, | ||
468 | [ C(OP_WRITE) ] = { | ||
469 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
470 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
471 | }, | ||
472 | [ C(OP_PREFETCH) ] = { | ||
473 | [ C(RESULT_ACCESS) ] = 0, | ||
474 | [ C(RESULT_MISS) ] = 0, | ||
475 | }, | ||
476 | }, | ||
477 | [ C(DTLB) ] = { | ||
478 | [ C(OP_READ) ] = { | ||
479 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ | ||
480 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ | ||
481 | }, | ||
482 | [ C(OP_WRITE) ] = { | ||
483 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ | ||
484 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ | ||
485 | }, | ||
486 | [ C(OP_PREFETCH) ] = { | ||
487 | [ C(RESULT_ACCESS) ] = 0, | ||
488 | [ C(RESULT_MISS) ] = 0, | ||
489 | }, | ||
490 | }, | ||
491 | [ C(ITLB) ] = { | ||
492 | [ C(OP_READ) ] = { | ||
493 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
494 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ | ||
495 | }, | ||
496 | [ C(OP_WRITE) ] = { | ||
497 | [ C(RESULT_ACCESS) ] = -1, | ||
498 | [ C(RESULT_MISS) ] = -1, | ||
499 | }, | ||
500 | [ C(OP_PREFETCH) ] = { | ||
501 | [ C(RESULT_ACCESS) ] = -1, | ||
502 | [ C(RESULT_MISS) ] = -1, | ||
503 | }, | ||
504 | }, | ||
505 | [ C(BPU ) ] = { | ||
506 | [ C(OP_READ) ] = { | ||
507 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
508 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
509 | }, | ||
510 | [ C(OP_WRITE) ] = { | ||
511 | [ C(RESULT_ACCESS) ] = -1, | ||
512 | [ C(RESULT_MISS) ] = -1, | ||
513 | }, | ||
514 | [ C(OP_PREFETCH) ] = { | ||
515 | [ C(RESULT_ACCESS) ] = -1, | ||
516 | [ C(RESULT_MISS) ] = -1, | ||
517 | }, | ||
518 | }, | ||
519 | }; | ||
520 | |||
521 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
522 | { | ||
523 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
524 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
525 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
526 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
527 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
528 | |||
529 | #define CORE_EVNTSEL_MASK \ | ||
530 | (CORE_EVNTSEL_EVENT_MASK | \ | ||
531 | CORE_EVNTSEL_UNIT_MASK | \ | ||
532 | CORE_EVNTSEL_EDGE_MASK | \ | ||
533 | CORE_EVNTSEL_INV_MASK | \ | ||
534 | CORE_EVNTSEL_REG_MASK) | ||
535 | |||
536 | return hw_event & CORE_EVNTSEL_MASK; | ||
537 | } | ||
538 | |||
539 | static __initconst u64 amd_hw_cache_event_ids | ||
540 | [PERF_COUNT_HW_CACHE_MAX] | ||
541 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
542 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
543 | { | ||
544 | [ C(L1D) ] = { | ||
545 | [ C(OP_READ) ] = { | ||
546 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
547 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | ||
548 | }, | ||
549 | [ C(OP_WRITE) ] = { | ||
550 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ | ||
551 | [ C(RESULT_MISS) ] = 0, | ||
552 | }, | ||
553 | [ C(OP_PREFETCH) ] = { | ||
554 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ | ||
555 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ | ||
556 | }, | ||
557 | }, | ||
558 | [ C(L1I ) ] = { | ||
559 | [ C(OP_READ) ] = { | ||
560 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ | ||
561 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ | ||
562 | }, | ||
563 | [ C(OP_WRITE) ] = { | ||
564 | [ C(RESULT_ACCESS) ] = -1, | ||
565 | [ C(RESULT_MISS) ] = -1, | ||
566 | }, | ||
567 | [ C(OP_PREFETCH) ] = { | ||
568 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ | ||
569 | [ C(RESULT_MISS) ] = 0, | ||
570 | }, | ||
571 | }, | ||
572 | [ C(LL ) ] = { | ||
573 | [ C(OP_READ) ] = { | ||
574 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ | ||
575 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ | ||
576 | }, | ||
577 | [ C(OP_WRITE) ] = { | ||
578 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ | ||
579 | [ C(RESULT_MISS) ] = 0, | ||
580 | }, | ||
581 | [ C(OP_PREFETCH) ] = { | ||
582 | [ C(RESULT_ACCESS) ] = 0, | ||
583 | [ C(RESULT_MISS) ] = 0, | ||
584 | }, | ||
585 | }, | ||
586 | [ C(DTLB) ] = { | ||
587 | [ C(OP_READ) ] = { | ||
588 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
589 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ | ||
590 | }, | ||
591 | [ C(OP_WRITE) ] = { | ||
592 | [ C(RESULT_ACCESS) ] = 0, | ||
593 | [ C(RESULT_MISS) ] = 0, | ||
594 | }, | ||
595 | [ C(OP_PREFETCH) ] = { | ||
596 | [ C(RESULT_ACCESS) ] = 0, | ||
597 | [ C(RESULT_MISS) ] = 0, | ||
598 | }, | ||
599 | }, | ||
600 | [ C(ITLB) ] = { | ||
601 | [ C(OP_READ) ] = { | ||
602 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | ||
603 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | ||
604 | }, | ||
605 | [ C(OP_WRITE) ] = { | ||
606 | [ C(RESULT_ACCESS) ] = -1, | ||
607 | [ C(RESULT_MISS) ] = -1, | ||
608 | }, | ||
609 | [ C(OP_PREFETCH) ] = { | ||
610 | [ C(RESULT_ACCESS) ] = -1, | ||
611 | [ C(RESULT_MISS) ] = -1, | ||
612 | }, | ||
613 | }, | ||
614 | [ C(BPU ) ] = { | ||
615 | [ C(OP_READ) ] = { | ||
616 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ | ||
617 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ | ||
618 | }, | ||
619 | [ C(OP_WRITE) ] = { | ||
620 | [ C(RESULT_ACCESS) ] = -1, | ||
621 | [ C(RESULT_MISS) ] = -1, | ||
622 | }, | ||
623 | [ C(OP_PREFETCH) ] = { | ||
624 | [ C(RESULT_ACCESS) ] = -1, | ||
625 | [ C(RESULT_MISS) ] = -1, | ||
626 | }, | ||
627 | }, | ||
628 | }; | ||
629 | |||
630 | /* | ||
631 | * AMD Performance Monitor K7 and later. | ||
632 | */ | ||
633 | static const u64 amd_perfmon_event_map[] = | ||
634 | { | ||
635 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | ||
636 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
637 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | ||
638 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | ||
639 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
640 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
641 | }; | ||
642 | |||
643 | static u64 amd_pmu_event_map(int hw_event) | ||
644 | { | ||
645 | return amd_perfmon_event_map[hw_event]; | ||
646 | } | ||
647 | |||
648 | static u64 amd_pmu_raw_event(u64 hw_event) | ||
649 | { | ||
650 | #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL | ||
651 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | ||
652 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | ||
653 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | ||
654 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | ||
655 | |||
656 | #define K7_EVNTSEL_MASK \ | ||
657 | (K7_EVNTSEL_EVENT_MASK | \ | ||
658 | K7_EVNTSEL_UNIT_MASK | \ | ||
659 | K7_EVNTSEL_EDGE_MASK | \ | ||
660 | K7_EVNTSEL_INV_MASK | \ | ||
661 | K7_EVNTSEL_REG_MASK) | ||
662 | |||
663 | return hw_event & K7_EVNTSEL_MASK; | ||
664 | } | ||
665 | |||
666 | /* | 275 | /* |
667 | * Propagate event elapsed time into the generic event. | 276 | * Propagate event elapsed time into the generic event. |
668 | * Can only be executed on the CPU where the event is active. | 277 | * Can only be executed on the CPU where the event is active. |
669 | * Returns the delta events processed. | 278 | * Returns the delta events processed. |
670 | */ | 279 | */ |
671 | static u64 | 280 | static u64 |
672 | x86_perf_event_update(struct perf_event *event, | 281 | x86_perf_event_update(struct perf_event *event) |
673 | struct hw_perf_event *hwc, int idx) | ||
674 | { | 282 | { |
675 | int shift = 64 - x86_pmu.event_bits; | 283 | struct hw_perf_event *hwc = &event->hw; |
284 | int shift = 64 - x86_pmu.cntval_bits; | ||
676 | u64 prev_raw_count, new_raw_count; | 285 | u64 prev_raw_count, new_raw_count; |
286 | int idx = hwc->idx; | ||
677 | s64 delta; | 287 | s64 delta; |
678 | 288 | ||
679 | if (idx == X86_PMC_IDX_FIXED_BTS) | 289 | if (idx == X86_PMC_IDX_FIXED_BTS) |
@@ -687,10 +297,10 @@ x86_perf_event_update(struct perf_event *event, | |||
687 | * count to the generic event atomically: | 297 | * count to the generic event atomically: |
688 | */ | 298 | */ |
689 | again: | 299 | again: |
690 | prev_raw_count = atomic64_read(&hwc->prev_count); | 300 | prev_raw_count = local64_read(&hwc->prev_count); |
691 | rdmsrl(hwc->event_base + idx, new_raw_count); | 301 | rdmsrl(hwc->event_base + idx, new_raw_count); |
692 | 302 | ||
693 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | 303 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
694 | new_raw_count) != prev_raw_count) | 304 | new_raw_count) != prev_raw_count) |
695 | goto again; | 305 | goto again; |
696 | 306 | ||
@@ -705,8 +315,8 @@ again: | |||
705 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | 315 | delta = (new_raw_count << shift) - (prev_raw_count << shift); |
706 | delta >>= shift; | 316 | delta >>= shift; |
707 | 317 | ||
708 | atomic64_add(delta, &event->count); | 318 | local64_add(delta, &event->count); |
709 | atomic64_sub(delta, &hwc->period_left); | 319 | local64_sub(delta, &hwc->period_left); |
710 | 320 | ||
711 | return new_raw_count; | 321 | return new_raw_count; |
712 | } | 322 | } |
@@ -714,33 +324,32 @@ again: | |||
714 | static atomic_t active_events; | 324 | static atomic_t active_events; |
715 | static DEFINE_MUTEX(pmc_reserve_mutex); | 325 | static DEFINE_MUTEX(pmc_reserve_mutex); |
716 | 326 | ||
327 | #ifdef CONFIG_X86_LOCAL_APIC | ||
328 | |||
717 | static bool reserve_pmc_hardware(void) | 329 | static bool reserve_pmc_hardware(void) |
718 | { | 330 | { |
719 | #ifdef CONFIG_X86_LOCAL_APIC | ||
720 | int i; | 331 | int i; |
721 | 332 | ||
722 | if (nmi_watchdog == NMI_LOCAL_APIC) | 333 | if (nmi_watchdog == NMI_LOCAL_APIC) |
723 | disable_lapic_nmi_watchdog(); | 334 | disable_lapic_nmi_watchdog(); |
724 | 335 | ||
725 | for (i = 0; i < x86_pmu.num_events; i++) { | 336 | for (i = 0; i < x86_pmu.num_counters; i++) { |
726 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | 337 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) |
727 | goto perfctr_fail; | 338 | goto perfctr_fail; |
728 | } | 339 | } |
729 | 340 | ||
730 | for (i = 0; i < x86_pmu.num_events; i++) { | 341 | for (i = 0; i < x86_pmu.num_counters; i++) { |
731 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | 342 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) |
732 | goto eventsel_fail; | 343 | goto eventsel_fail; |
733 | } | 344 | } |
734 | #endif | ||
735 | 345 | ||
736 | return true; | 346 | return true; |
737 | 347 | ||
738 | #ifdef CONFIG_X86_LOCAL_APIC | ||
739 | eventsel_fail: | 348 | eventsel_fail: |
740 | for (i--; i >= 0; i--) | 349 | for (i--; i >= 0; i--) |
741 | release_evntsel_nmi(x86_pmu.eventsel + i); | 350 | release_evntsel_nmi(x86_pmu.eventsel + i); |
742 | 351 | ||
743 | i = x86_pmu.num_events; | 352 | i = x86_pmu.num_counters; |
744 | 353 | ||
745 | perfctr_fail: | 354 | perfctr_fail: |
746 | for (i--; i >= 0; i--) | 355 | for (i--; i >= 0; i--) |
@@ -750,128 +359,36 @@ perfctr_fail: | |||
750 | enable_lapic_nmi_watchdog(); | 359 | enable_lapic_nmi_watchdog(); |
751 | 360 | ||
752 | return false; | 361 | return false; |
753 | #endif | ||
754 | } | 362 | } |
755 | 363 | ||
756 | static void release_pmc_hardware(void) | 364 | static void release_pmc_hardware(void) |
757 | { | 365 | { |
758 | #ifdef CONFIG_X86_LOCAL_APIC | ||
759 | int i; | 366 | int i; |
760 | 367 | ||
761 | for (i = 0; i < x86_pmu.num_events; i++) { | 368 | for (i = 0; i < x86_pmu.num_counters; i++) { |
762 | release_perfctr_nmi(x86_pmu.perfctr + i); | 369 | release_perfctr_nmi(x86_pmu.perfctr + i); |
763 | release_evntsel_nmi(x86_pmu.eventsel + i); | 370 | release_evntsel_nmi(x86_pmu.eventsel + i); |
764 | } | 371 | } |
765 | 372 | ||
766 | if (nmi_watchdog == NMI_LOCAL_APIC) | 373 | if (nmi_watchdog == NMI_LOCAL_APIC) |
767 | enable_lapic_nmi_watchdog(); | 374 | enable_lapic_nmi_watchdog(); |
768 | #endif | ||
769 | } | ||
770 | |||
771 | static inline bool bts_available(void) | ||
772 | { | ||
773 | return x86_pmu.enable_bts != NULL; | ||
774 | } | ||
775 | |||
776 | static inline void init_debug_store_on_cpu(int cpu) | ||
777 | { | ||
778 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
779 | |||
780 | if (!ds) | ||
781 | return; | ||
782 | |||
783 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
784 | (u32)((u64)(unsigned long)ds), | ||
785 | (u32)((u64)(unsigned long)ds >> 32)); | ||
786 | } | ||
787 | |||
788 | static inline void fini_debug_store_on_cpu(int cpu) | ||
789 | { | ||
790 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
791 | return; | ||
792 | |||
793 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
794 | } | 375 | } |
795 | 376 | ||
796 | static void release_bts_hardware(void) | 377 | #else |
797 | { | ||
798 | int cpu; | ||
799 | |||
800 | if (!bts_available()) | ||
801 | return; | ||
802 | |||
803 | get_online_cpus(); | ||
804 | |||
805 | for_each_online_cpu(cpu) | ||
806 | fini_debug_store_on_cpu(cpu); | ||
807 | 378 | ||
808 | for_each_possible_cpu(cpu) { | 379 | static bool reserve_pmc_hardware(void) { return true; } |
809 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 380 | static void release_pmc_hardware(void) {} |
810 | |||
811 | if (!ds) | ||
812 | continue; | ||
813 | |||
814 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
815 | |||
816 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
817 | kfree(ds); | ||
818 | } | ||
819 | |||
820 | put_online_cpus(); | ||
821 | } | ||
822 | |||
823 | static int reserve_bts_hardware(void) | ||
824 | { | ||
825 | int cpu, err = 0; | ||
826 | |||
827 | if (!bts_available()) | ||
828 | return 0; | ||
829 | |||
830 | get_online_cpus(); | ||
831 | |||
832 | for_each_possible_cpu(cpu) { | ||
833 | struct debug_store *ds; | ||
834 | void *buffer; | ||
835 | |||
836 | err = -ENOMEM; | ||
837 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
838 | if (unlikely(!buffer)) | ||
839 | break; | ||
840 | |||
841 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
842 | if (unlikely(!ds)) { | ||
843 | kfree(buffer); | ||
844 | break; | ||
845 | } | ||
846 | 381 | ||
847 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | 382 | #endif |
848 | ds->bts_index = ds->bts_buffer_base; | ||
849 | ds->bts_absolute_maximum = | ||
850 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
851 | ds->bts_interrupt_threshold = | ||
852 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
853 | |||
854 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
855 | err = 0; | ||
856 | } | ||
857 | |||
858 | if (err) | ||
859 | release_bts_hardware(); | ||
860 | else { | ||
861 | for_each_online_cpu(cpu) | ||
862 | init_debug_store_on_cpu(cpu); | ||
863 | } | ||
864 | 383 | ||
865 | put_online_cpus(); | 384 | static int reserve_ds_buffers(void); |
866 | 385 | static void release_ds_buffers(void); | |
867 | return err; | ||
868 | } | ||
869 | 386 | ||
870 | static void hw_perf_event_destroy(struct perf_event *event) | 387 | static void hw_perf_event_destroy(struct perf_event *event) |
871 | { | 388 | { |
872 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { | 389 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { |
873 | release_pmc_hardware(); | 390 | release_pmc_hardware(); |
874 | release_bts_hardware(); | 391 | release_ds_buffers(); |
875 | mutex_unlock(&pmc_reserve_mutex); | 392 | mutex_unlock(&pmc_reserve_mutex); |
876 | } | 393 | } |
877 | } | 394 | } |
@@ -914,93 +431,16 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
914 | return 0; | 431 | return 0; |
915 | } | 432 | } |
916 | 433 | ||
917 | static void intel_pmu_enable_bts(u64 config) | 434 | static int x86_setup_perfctr(struct perf_event *event) |
918 | { | ||
919 | unsigned long debugctlmsr; | ||
920 | |||
921 | debugctlmsr = get_debugctlmsr(); | ||
922 | |||
923 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
924 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
925 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
926 | |||
927 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
928 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
929 | |||
930 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
931 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
932 | |||
933 | update_debugctlmsr(debugctlmsr); | ||
934 | } | ||
935 | |||
936 | static void intel_pmu_disable_bts(void) | ||
937 | { | ||
938 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
939 | unsigned long debugctlmsr; | ||
940 | |||
941 | if (!cpuc->ds) | ||
942 | return; | ||
943 | |||
944 | debugctlmsr = get_debugctlmsr(); | ||
945 | |||
946 | debugctlmsr &= | ||
947 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
948 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
949 | |||
950 | update_debugctlmsr(debugctlmsr); | ||
951 | } | ||
952 | |||
953 | /* | ||
954 | * Setup the hardware configuration for a given attr_type | ||
955 | */ | ||
956 | static int __hw_perf_event_init(struct perf_event *event) | ||
957 | { | 435 | { |
958 | struct perf_event_attr *attr = &event->attr; | 436 | struct perf_event_attr *attr = &event->attr; |
959 | struct hw_perf_event *hwc = &event->hw; | 437 | struct hw_perf_event *hwc = &event->hw; |
960 | u64 config; | 438 | u64 config; |
961 | int err; | ||
962 | |||
963 | if (!x86_pmu_initialized()) | ||
964 | return -ENODEV; | ||
965 | |||
966 | err = 0; | ||
967 | if (!atomic_inc_not_zero(&active_events)) { | ||
968 | mutex_lock(&pmc_reserve_mutex); | ||
969 | if (atomic_read(&active_events) == 0) { | ||
970 | if (!reserve_pmc_hardware()) | ||
971 | err = -EBUSY; | ||
972 | else | ||
973 | err = reserve_bts_hardware(); | ||
974 | } | ||
975 | if (!err) | ||
976 | atomic_inc(&active_events); | ||
977 | mutex_unlock(&pmc_reserve_mutex); | ||
978 | } | ||
979 | if (err) | ||
980 | return err; | ||
981 | |||
982 | event->destroy = hw_perf_event_destroy; | ||
983 | |||
984 | /* | ||
985 | * Generate PMC IRQs: | ||
986 | * (keep 'enabled' bit clear for now) | ||
987 | */ | ||
988 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
989 | |||
990 | hwc->idx = -1; | ||
991 | |||
992 | /* | ||
993 | * Count user and OS events unless requested not to. | ||
994 | */ | ||
995 | if (!attr->exclude_user) | ||
996 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
997 | if (!attr->exclude_kernel) | ||
998 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
999 | 439 | ||
1000 | if (!hwc->sample_period) { | 440 | if (!hwc->sample_period) { |
1001 | hwc->sample_period = x86_pmu.max_period; | 441 | hwc->sample_period = x86_pmu.max_period; |
1002 | hwc->last_period = hwc->sample_period; | 442 | hwc->last_period = hwc->sample_period; |
1003 | atomic64_set(&hwc->period_left, hwc->sample_period); | 443 | local64_set(&hwc->period_left, hwc->sample_period); |
1004 | } else { | 444 | } else { |
1005 | /* | 445 | /* |
1006 | * If we have a PMU initialized but no APIC | 446 | * If we have a PMU initialized but no APIC |
@@ -1012,13 +452,8 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
1012 | return -EOPNOTSUPP; | 452 | return -EOPNOTSUPP; |
1013 | } | 453 | } |
1014 | 454 | ||
1015 | /* | 455 | if (attr->type == PERF_TYPE_RAW) |
1016 | * Raw hw_event type provide the config in the hw_event structure | ||
1017 | */ | ||
1018 | if (attr->type == PERF_TYPE_RAW) { | ||
1019 | hwc->config |= x86_pmu.raw_event(attr->config); | ||
1020 | return 0; | 456 | return 0; |
1021 | } | ||
1022 | 457 | ||
1023 | if (attr->type == PERF_TYPE_HW_CACHE) | 458 | if (attr->type == PERF_TYPE_HW_CACHE) |
1024 | return set_ext_hw_attr(hwc, attr); | 459 | return set_ext_hw_attr(hwc, attr); |
@@ -1043,11 +478,11 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
1043 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 478 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && |
1044 | (hwc->sample_period == 1)) { | 479 | (hwc->sample_period == 1)) { |
1045 | /* BTS is not supported by this architecture. */ | 480 | /* BTS is not supported by this architecture. */ |
1046 | if (!bts_available()) | 481 | if (!x86_pmu.bts) |
1047 | return -EOPNOTSUPP; | 482 | return -EOPNOTSUPP; |
1048 | 483 | ||
1049 | /* BTS is currently only allowed for user-mode. */ | 484 | /* BTS is currently only allowed for user-mode. */ |
1050 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | 485 | if (!attr->exclude_kernel) |
1051 | return -EOPNOTSUPP; | 486 | return -EOPNOTSUPP; |
1052 | } | 487 | } |
1053 | 488 | ||
@@ -1056,127 +491,122 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
1056 | return 0; | 491 | return 0; |
1057 | } | 492 | } |
1058 | 493 | ||
1059 | static void p6_pmu_disable_all(void) | 494 | static int x86_pmu_hw_config(struct perf_event *event) |
1060 | { | 495 | { |
1061 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 496 | if (event->attr.precise_ip) { |
1062 | u64 val; | 497 | int precise = 0; |
1063 | 498 | ||
1064 | if (!cpuc->enabled) | 499 | /* Support for constant skid */ |
1065 | return; | 500 | if (x86_pmu.pebs) |
501 | precise++; | ||
1066 | 502 | ||
1067 | cpuc->enabled = 0; | 503 | /* Support for IP fixup */ |
1068 | barrier(); | 504 | if (x86_pmu.lbr_nr) |
505 | precise++; | ||
506 | |||
507 | if (event->attr.precise_ip > precise) | ||
508 | return -EOPNOTSUPP; | ||
509 | } | ||
510 | |||
511 | /* | ||
512 | * Generate PMC IRQs: | ||
513 | * (keep 'enabled' bit clear for now) | ||
514 | */ | ||
515 | event->hw.config = ARCH_PERFMON_EVENTSEL_INT; | ||
516 | |||
517 | /* | ||
518 | * Count user and OS events unless requested not to | ||
519 | */ | ||
520 | if (!event->attr.exclude_user) | ||
521 | event->hw.config |= ARCH_PERFMON_EVENTSEL_USR; | ||
522 | if (!event->attr.exclude_kernel) | ||
523 | event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; | ||
524 | |||
525 | if (event->attr.type == PERF_TYPE_RAW) | ||
526 | event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; | ||
1069 | 527 | ||
1070 | /* p6 only has one enable register */ | 528 | return x86_setup_perfctr(event); |
1071 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
1072 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1073 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
1074 | } | 529 | } |
1075 | 530 | ||
1076 | static void intel_pmu_disable_all(void) | 531 | /* |
532 | * Setup the hardware configuration for a given attr_type | ||
533 | */ | ||
534 | static int __hw_perf_event_init(struct perf_event *event) | ||
1077 | { | 535 | { |
1078 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 536 | int err; |
1079 | 537 | ||
1080 | if (!cpuc->enabled) | 538 | if (!x86_pmu_initialized()) |
1081 | return; | 539 | return -ENODEV; |
1082 | 540 | ||
1083 | cpuc->enabled = 0; | 541 | err = 0; |
1084 | barrier(); | 542 | if (!atomic_inc_not_zero(&active_events)) { |
543 | mutex_lock(&pmc_reserve_mutex); | ||
544 | if (atomic_read(&active_events) == 0) { | ||
545 | if (!reserve_pmc_hardware()) | ||
546 | err = -EBUSY; | ||
547 | else { | ||
548 | err = reserve_ds_buffers(); | ||
549 | if (err) | ||
550 | release_pmc_hardware(); | ||
551 | } | ||
552 | } | ||
553 | if (!err) | ||
554 | atomic_inc(&active_events); | ||
555 | mutex_unlock(&pmc_reserve_mutex); | ||
556 | } | ||
557 | if (err) | ||
558 | return err; | ||
1085 | 559 | ||
1086 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | 560 | event->destroy = hw_perf_event_destroy; |
1087 | 561 | ||
1088 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | 562 | event->hw.idx = -1; |
1089 | intel_pmu_disable_bts(); | 563 | event->hw.last_cpu = -1; |
564 | event->hw.last_tag = ~0ULL; | ||
565 | |||
566 | return x86_pmu.hw_config(event); | ||
1090 | } | 567 | } |
1091 | 568 | ||
1092 | static void amd_pmu_disable_all(void) | 569 | static void x86_pmu_disable_all(void) |
1093 | { | 570 | { |
1094 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 571 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1095 | int idx; | 572 | int idx; |
1096 | 573 | ||
1097 | if (!cpuc->enabled) | 574 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1098 | return; | ||
1099 | |||
1100 | cpuc->enabled = 0; | ||
1101 | /* | ||
1102 | * ensure we write the disable before we start disabling the | ||
1103 | * events proper, so that amd_pmu_enable_event() does the | ||
1104 | * right thing. | ||
1105 | */ | ||
1106 | barrier(); | ||
1107 | |||
1108 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
1109 | u64 val; | 575 | u64 val; |
1110 | 576 | ||
1111 | if (!test_bit(idx, cpuc->active_mask)) | 577 | if (!test_bit(idx, cpuc->active_mask)) |
1112 | continue; | 578 | continue; |
1113 | rdmsrl(MSR_K7_EVNTSEL0 + idx, val); | 579 | rdmsrl(x86_pmu.eventsel + idx, val); |
1114 | if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) | 580 | if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) |
1115 | continue; | 581 | continue; |
1116 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | 582 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; |
1117 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | 583 | wrmsrl(x86_pmu.eventsel + idx, val); |
1118 | } | 584 | } |
1119 | } | 585 | } |
1120 | 586 | ||
1121 | void hw_perf_disable(void) | 587 | void hw_perf_disable(void) |
1122 | { | 588 | { |
1123 | if (!x86_pmu_initialized()) | ||
1124 | return; | ||
1125 | return x86_pmu.disable_all(); | ||
1126 | } | ||
1127 | |||
1128 | static void p6_pmu_enable_all(void) | ||
1129 | { | ||
1130 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 589 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1131 | unsigned long val; | ||
1132 | 590 | ||
1133 | if (cpuc->enabled) | 591 | if (!x86_pmu_initialized()) |
1134 | return; | 592 | return; |
1135 | 593 | ||
1136 | cpuc->enabled = 1; | 594 | if (!cpuc->enabled) |
1137 | barrier(); | ||
1138 | |||
1139 | /* p6 only has one enable register */ | ||
1140 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
1141 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1142 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
1143 | } | ||
1144 | |||
1145 | static void intel_pmu_enable_all(void) | ||
1146 | { | ||
1147 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1148 | |||
1149 | if (cpuc->enabled) | ||
1150 | return; | 595 | return; |
1151 | 596 | ||
1152 | cpuc->enabled = 1; | 597 | cpuc->n_added = 0; |
598 | cpuc->enabled = 0; | ||
1153 | barrier(); | 599 | barrier(); |
1154 | 600 | ||
1155 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 601 | x86_pmu.disable_all(); |
1156 | |||
1157 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | ||
1158 | struct perf_event *event = | ||
1159 | cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
1160 | |||
1161 | if (WARN_ON_ONCE(!event)) | ||
1162 | return; | ||
1163 | |||
1164 | intel_pmu_enable_bts(event->hw.config); | ||
1165 | } | ||
1166 | } | 602 | } |
1167 | 603 | ||
1168 | static void amd_pmu_enable_all(void) | 604 | static void x86_pmu_enable_all(int added) |
1169 | { | 605 | { |
1170 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 606 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1171 | int idx; | 607 | int idx; |
1172 | 608 | ||
1173 | if (cpuc->enabled) | 609 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1174 | return; | ||
1175 | |||
1176 | cpuc->enabled = 1; | ||
1177 | barrier(); | ||
1178 | |||
1179 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
1180 | struct perf_event *event = cpuc->events[idx]; | 610 | struct perf_event *event = cpuc->events[idx]; |
1181 | u64 val; | 611 | u64 val; |
1182 | 612 | ||
@@ -1184,88 +614,267 @@ static void amd_pmu_enable_all(void) | |||
1184 | continue; | 614 | continue; |
1185 | 615 | ||
1186 | val = event->hw.config; | 616 | val = event->hw.config; |
1187 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 617 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
1188 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | 618 | wrmsrl(x86_pmu.eventsel + idx, val); |
1189 | } | 619 | } |
1190 | } | 620 | } |
1191 | 621 | ||
1192 | void hw_perf_enable(void) | 622 | static const struct pmu pmu; |
623 | |||
624 | static inline int is_x86_event(struct perf_event *event) | ||
1193 | { | 625 | { |
1194 | if (!x86_pmu_initialized()) | 626 | return event->pmu == &pmu; |
1195 | return; | ||
1196 | x86_pmu.enable_all(); | ||
1197 | } | 627 | } |
1198 | 628 | ||
1199 | static inline u64 intel_pmu_get_status(void) | 629 | static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
1200 | { | 630 | { |
1201 | u64 status; | 631 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; |
632 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
633 | int i, j, w, wmax, num = 0; | ||
634 | struct hw_perf_event *hwc; | ||
1202 | 635 | ||
1203 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | 636 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
1204 | 637 | ||
1205 | return status; | 638 | for (i = 0; i < n; i++) { |
1206 | } | 639 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); |
640 | constraints[i] = c; | ||
641 | } | ||
1207 | 642 | ||
1208 | static inline void intel_pmu_ack_status(u64 ack) | 643 | /* |
1209 | { | 644 | * fastpath, try to reuse previous register |
1210 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | 645 | */ |
1211 | } | 646 | for (i = 0; i < n; i++) { |
647 | hwc = &cpuc->event_list[i]->hw; | ||
648 | c = constraints[i]; | ||
1212 | 649 | ||
1213 | static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) | 650 | /* never assigned */ |
1214 | { | 651 | if (hwc->idx == -1) |
1215 | (void)checking_wrmsrl(hwc->config_base + idx, | 652 | break; |
1216 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); | ||
1217 | } | ||
1218 | 653 | ||
1219 | static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) | 654 | /* constraint still honored */ |
1220 | { | 655 | if (!test_bit(hwc->idx, c->idxmsk)) |
1221 | (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); | 656 | break; |
657 | |||
658 | /* not already used */ | ||
659 | if (test_bit(hwc->idx, used_mask)) | ||
660 | break; | ||
661 | |||
662 | __set_bit(hwc->idx, used_mask); | ||
663 | if (assign) | ||
664 | assign[i] = hwc->idx; | ||
665 | } | ||
666 | if (i == n) | ||
667 | goto done; | ||
668 | |||
669 | /* | ||
670 | * begin slow path | ||
671 | */ | ||
672 | |||
673 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
674 | |||
675 | /* | ||
676 | * weight = number of possible counters | ||
677 | * | ||
678 | * 1 = most constrained, only works on one counter | ||
679 | * wmax = least constrained, works on any counter | ||
680 | * | ||
681 | * assign events to counters starting with most | ||
682 | * constrained events. | ||
683 | */ | ||
684 | wmax = x86_pmu.num_counters; | ||
685 | |||
686 | /* | ||
687 | * when fixed event counters are present, | ||
688 | * wmax is incremented by 1 to account | ||
689 | * for one more choice | ||
690 | */ | ||
691 | if (x86_pmu.num_counters_fixed) | ||
692 | wmax++; | ||
693 | |||
694 | for (w = 1, num = n; num && w <= wmax; w++) { | ||
695 | /* for each event */ | ||
696 | for (i = 0; num && i < n; i++) { | ||
697 | c = constraints[i]; | ||
698 | hwc = &cpuc->event_list[i]->hw; | ||
699 | |||
700 | if (c->weight != w) | ||
701 | continue; | ||
702 | |||
703 | for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { | ||
704 | if (!test_bit(j, used_mask)) | ||
705 | break; | ||
706 | } | ||
707 | |||
708 | if (j == X86_PMC_IDX_MAX) | ||
709 | break; | ||
710 | |||
711 | __set_bit(j, used_mask); | ||
712 | |||
713 | if (assign) | ||
714 | assign[i] = j; | ||
715 | num--; | ||
716 | } | ||
717 | } | ||
718 | done: | ||
719 | /* | ||
720 | * scheduling failed or is just a simulation, | ||
721 | * free resources if necessary | ||
722 | */ | ||
723 | if (!assign || num) { | ||
724 | for (i = 0; i < n; i++) { | ||
725 | if (x86_pmu.put_event_constraints) | ||
726 | x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); | ||
727 | } | ||
728 | } | ||
729 | return num ? -ENOSPC : 0; | ||
1222 | } | 730 | } |
1223 | 731 | ||
1224 | static inline void | 732 | /* |
1225 | intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) | 733 | * dogrp: true if must collect siblings events (group) |
734 | * returns total number of events and error code | ||
735 | */ | ||
736 | static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp) | ||
1226 | { | 737 | { |
1227 | int idx = __idx - X86_PMC_IDX_FIXED; | 738 | struct perf_event *event; |
1228 | u64 ctrl_val, mask; | 739 | int n, max_count; |
1229 | 740 | ||
1230 | mask = 0xfULL << (idx * 4); | 741 | max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed; |
1231 | 742 | ||
1232 | rdmsrl(hwc->config_base, ctrl_val); | 743 | /* current number of events already accepted */ |
1233 | ctrl_val &= ~mask; | 744 | n = cpuc->n_events; |
1234 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | 745 | |
746 | if (is_x86_event(leader)) { | ||
747 | if (n >= max_count) | ||
748 | return -ENOSPC; | ||
749 | cpuc->event_list[n] = leader; | ||
750 | n++; | ||
751 | } | ||
752 | if (!dogrp) | ||
753 | return n; | ||
754 | |||
755 | list_for_each_entry(event, &leader->sibling_list, group_entry) { | ||
756 | if (!is_x86_event(event) || | ||
757 | event->state <= PERF_EVENT_STATE_OFF) | ||
758 | continue; | ||
759 | |||
760 | if (n >= max_count) | ||
761 | return -ENOSPC; | ||
762 | |||
763 | cpuc->event_list[n] = event; | ||
764 | n++; | ||
765 | } | ||
766 | return n; | ||
1235 | } | 767 | } |
1236 | 768 | ||
1237 | static inline void | 769 | static inline void x86_assign_hw_event(struct perf_event *event, |
1238 | p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) | 770 | struct cpu_hw_events *cpuc, int i) |
1239 | { | 771 | { |
1240 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 772 | struct hw_perf_event *hwc = &event->hw; |
1241 | u64 val = P6_NOP_EVENT; | ||
1242 | 773 | ||
1243 | if (cpuc->enabled) | 774 | hwc->idx = cpuc->assign[i]; |
1244 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 775 | hwc->last_cpu = smp_processor_id(); |
776 | hwc->last_tag = ++cpuc->tags[i]; | ||
1245 | 777 | ||
1246 | (void)checking_wrmsrl(hwc->config_base + idx, val); | 778 | if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { |
779 | hwc->config_base = 0; | ||
780 | hwc->event_base = 0; | ||
781 | } else if (hwc->idx >= X86_PMC_IDX_FIXED) { | ||
782 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
783 | /* | ||
784 | * We set it so that event_base + idx in wrmsr/rdmsr maps to | ||
785 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
786 | */ | ||
787 | hwc->event_base = | ||
788 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
789 | } else { | ||
790 | hwc->config_base = x86_pmu.eventsel; | ||
791 | hwc->event_base = x86_pmu.perfctr; | ||
792 | } | ||
1247 | } | 793 | } |
1248 | 794 | ||
1249 | static inline void | 795 | static inline int match_prev_assignment(struct hw_perf_event *hwc, |
1250 | intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) | 796 | struct cpu_hw_events *cpuc, |
797 | int i) | ||
1251 | { | 798 | { |
1252 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | 799 | return hwc->idx == cpuc->assign[i] && |
1253 | intel_pmu_disable_bts(); | 800 | hwc->last_cpu == smp_processor_id() && |
801 | hwc->last_tag == cpuc->tags[i]; | ||
802 | } | ||
803 | |||
804 | static int x86_pmu_start(struct perf_event *event); | ||
805 | static void x86_pmu_stop(struct perf_event *event); | ||
806 | |||
807 | void hw_perf_enable(void) | ||
808 | { | ||
809 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
810 | struct perf_event *event; | ||
811 | struct hw_perf_event *hwc; | ||
812 | int i, added = cpuc->n_added; | ||
813 | |||
814 | if (!x86_pmu_initialized()) | ||
1254 | return; | 815 | return; |
1255 | } | ||
1256 | 816 | ||
1257 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 817 | if (cpuc->enabled) |
1258 | intel_pmu_disable_fixed(hwc, idx); | ||
1259 | return; | 818 | return; |
819 | |||
820 | if (cpuc->n_added) { | ||
821 | int n_running = cpuc->n_events - cpuc->n_added; | ||
822 | /* | ||
823 | * apply assignment obtained either from | ||
824 | * hw_perf_group_sched_in() or x86_pmu_enable() | ||
825 | * | ||
826 | * step1: save events moving to new counters | ||
827 | * step2: reprogram moved events into new counters | ||
828 | */ | ||
829 | for (i = 0; i < n_running; i++) { | ||
830 | event = cpuc->event_list[i]; | ||
831 | hwc = &event->hw; | ||
832 | |||
833 | /* | ||
834 | * we can avoid reprogramming counter if: | ||
835 | * - assigned same counter as last time | ||
836 | * - running on same CPU as last time | ||
837 | * - no other event has used the counter since | ||
838 | */ | ||
839 | if (hwc->idx == -1 || | ||
840 | match_prev_assignment(hwc, cpuc, i)) | ||
841 | continue; | ||
842 | |||
843 | x86_pmu_stop(event); | ||
844 | } | ||
845 | |||
846 | for (i = 0; i < cpuc->n_events; i++) { | ||
847 | event = cpuc->event_list[i]; | ||
848 | hwc = &event->hw; | ||
849 | |||
850 | if (!match_prev_assignment(hwc, cpuc, i)) | ||
851 | x86_assign_hw_event(event, cpuc, i); | ||
852 | else if (i < n_running) | ||
853 | continue; | ||
854 | |||
855 | x86_pmu_start(event); | ||
856 | } | ||
857 | cpuc->n_added = 0; | ||
858 | perf_events_lapic_init(); | ||
1260 | } | 859 | } |
1261 | 860 | ||
1262 | x86_pmu_disable_event(hwc, idx); | 861 | cpuc->enabled = 1; |
862 | barrier(); | ||
863 | |||
864 | x86_pmu.enable_all(added); | ||
1263 | } | 865 | } |
1264 | 866 | ||
1265 | static inline void | 867 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
1266 | amd_pmu_disable_event(struct hw_perf_event *hwc, int idx) | 868 | u64 enable_mask) |
1267 | { | 869 | { |
1268 | x86_pmu_disable_event(hwc, idx); | 870 | wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); |
871 | } | ||
872 | |||
873 | static inline void x86_pmu_disable_event(struct perf_event *event) | ||
874 | { | ||
875 | struct hw_perf_event *hwc = &event->hw; | ||
876 | |||
877 | wrmsrl(hwc->config_base + hwc->idx, hwc->config); | ||
1269 | } | 878 | } |
1270 | 879 | ||
1271 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 880 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
@@ -1275,12 +884,12 @@ static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | |||
1275 | * To be called with the event disabled in hw: | 884 | * To be called with the event disabled in hw: |
1276 | */ | 885 | */ |
1277 | static int | 886 | static int |
1278 | x86_perf_event_set_period(struct perf_event *event, | 887 | x86_perf_event_set_period(struct perf_event *event) |
1279 | struct hw_perf_event *hwc, int idx) | ||
1280 | { | 888 | { |
1281 | s64 left = atomic64_read(&hwc->period_left); | 889 | struct hw_perf_event *hwc = &event->hw; |
890 | s64 left = local64_read(&hwc->period_left); | ||
1282 | s64 period = hwc->sample_period; | 891 | s64 period = hwc->sample_period; |
1283 | int err, ret = 0; | 892 | int ret = 0, idx = hwc->idx; |
1284 | 893 | ||
1285 | if (idx == X86_PMC_IDX_FIXED_BTS) | 894 | if (idx == X86_PMC_IDX_FIXED_BTS) |
1286 | return 0; | 895 | return 0; |
@@ -1290,14 +899,14 @@ x86_perf_event_set_period(struct perf_event *event, | |||
1290 | */ | 899 | */ |
1291 | if (unlikely(left <= -period)) { | 900 | if (unlikely(left <= -period)) { |
1292 | left = period; | 901 | left = period; |
1293 | atomic64_set(&hwc->period_left, left); | 902 | local64_set(&hwc->period_left, left); |
1294 | hwc->last_period = period; | 903 | hwc->last_period = period; |
1295 | ret = 1; | 904 | ret = 1; |
1296 | } | 905 | } |
1297 | 906 | ||
1298 | if (unlikely(left <= 0)) { | 907 | if (unlikely(left <= 0)) { |
1299 | left += period; | 908 | left += period; |
1300 | atomic64_set(&hwc->period_left, left); | 909 | local64_set(&hwc->period_left, left); |
1301 | hwc->last_period = period; | 910 | hwc->last_period = period; |
1302 | ret = 1; | 911 | ret = 1; |
1303 | } | 912 | } |
@@ -1316,229 +925,94 @@ x86_perf_event_set_period(struct perf_event *event, | |||
1316 | * The hw event starts counting from this event offset, | 925 | * The hw event starts counting from this event offset, |
1317 | * mark it to be able to extra future deltas: | 926 | * mark it to be able to extra future deltas: |
1318 | */ | 927 | */ |
1319 | atomic64_set(&hwc->prev_count, (u64)-left); | 928 | local64_set(&hwc->prev_count, (u64)-left); |
1320 | |||
1321 | err = checking_wrmsrl(hwc->event_base + idx, | ||
1322 | (u64)(-left) & x86_pmu.event_mask); | ||
1323 | 929 | ||
1324 | perf_event_update_userpage(event); | 930 | wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); |
1325 | |||
1326 | return ret; | ||
1327 | } | ||
1328 | |||
1329 | static inline void | ||
1330 | intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) | ||
1331 | { | ||
1332 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
1333 | u64 ctrl_val, bits, mask; | ||
1334 | int err; | ||
1335 | 931 | ||
1336 | /* | 932 | /* |
1337 | * Enable IRQ generation (0x8), | 933 | * Due to erratum on certan cpu we need |
1338 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | 934 | * a second write to be sure the register |
1339 | * if requested: | 935 | * is updated properly |
1340 | */ | 936 | */ |
1341 | bits = 0x8ULL; | 937 | if (x86_pmu.perfctr_second_write) { |
1342 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | 938 | wrmsrl(hwc->event_base + idx, |
1343 | bits |= 0x2; | 939 | (u64)(-left) & x86_pmu.cntval_mask); |
1344 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
1345 | bits |= 0x1; | ||
1346 | |||
1347 | /* | ||
1348 | * ANY bit is supported in v3 and up | ||
1349 | */ | ||
1350 | if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) | ||
1351 | bits |= 0x4; | ||
1352 | |||
1353 | bits <<= (idx * 4); | ||
1354 | mask = 0xfULL << (idx * 4); | ||
1355 | |||
1356 | rdmsrl(hwc->config_base, ctrl_val); | ||
1357 | ctrl_val &= ~mask; | ||
1358 | ctrl_val |= bits; | ||
1359 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
1360 | } | ||
1361 | |||
1362 | static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1363 | { | ||
1364 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1365 | u64 val; | ||
1366 | |||
1367 | val = hwc->config; | ||
1368 | if (cpuc->enabled) | ||
1369 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1370 | |||
1371 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1372 | } | ||
1373 | |||
1374 | |||
1375 | static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1376 | { | ||
1377 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1378 | if (!__get_cpu_var(cpu_hw_events).enabled) | ||
1379 | return; | ||
1380 | |||
1381 | intel_pmu_enable_bts(hwc->config); | ||
1382 | return; | ||
1383 | } | 940 | } |
1384 | 941 | ||
1385 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 942 | perf_event_update_userpage(event); |
1386 | intel_pmu_enable_fixed(hwc, idx); | ||
1387 | return; | ||
1388 | } | ||
1389 | 943 | ||
1390 | x86_pmu_enable_event(hwc, idx); | 944 | return ret; |
1391 | } | 945 | } |
1392 | 946 | ||
1393 | static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) | 947 | static void x86_pmu_enable_event(struct perf_event *event) |
1394 | { | 948 | { |
1395 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 949 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1396 | |||
1397 | if (cpuc->enabled) | 950 | if (cpuc->enabled) |
1398 | x86_pmu_enable_event(hwc, idx); | 951 | __x86_pmu_enable_event(&event->hw, |
1399 | } | 952 | ARCH_PERFMON_EVENTSEL_ENABLE); |
1400 | |||
1401 | static int fixed_mode_idx(struct hw_perf_event *hwc) | ||
1402 | { | ||
1403 | unsigned int hw_event; | ||
1404 | |||
1405 | hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
1406 | |||
1407 | if (unlikely((hw_event == | ||
1408 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | ||
1409 | (hwc->sample_period == 1))) | ||
1410 | return X86_PMC_IDX_FIXED_BTS; | ||
1411 | |||
1412 | if (!x86_pmu.num_events_fixed) | ||
1413 | return -1; | ||
1414 | |||
1415 | /* | ||
1416 | * fixed counters do not take all possible filters | ||
1417 | */ | ||
1418 | if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) | ||
1419 | return -1; | ||
1420 | |||
1421 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | ||
1422 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | ||
1423 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) | ||
1424 | return X86_PMC_IDX_FIXED_CPU_CYCLES; | ||
1425 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) | ||
1426 | return X86_PMC_IDX_FIXED_BUS_CYCLES; | ||
1427 | |||
1428 | return -1; | ||
1429 | } | 953 | } |
1430 | 954 | ||
1431 | /* | 955 | /* |
1432 | * generic counter allocator: get next free counter | 956 | * activate a single event |
1433 | */ | 957 | * |
1434 | static int | 958 | * The event is added to the group of enabled events |
1435 | gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | 959 | * but only if it can be scehduled with existing events. |
1436 | { | 960 | * |
1437 | int idx; | 961 | * Called with PMU disabled. If successful and return value 1, |
1438 | 962 | * then guaranteed to call perf_enable() and hw_perf_enable() | |
1439 | idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); | ||
1440 | return idx == x86_pmu.num_events ? -1 : idx; | ||
1441 | } | ||
1442 | |||
1443 | /* | ||
1444 | * intel-specific counter allocator: check event constraints | ||
1445 | */ | 963 | */ |
1446 | static int | 964 | static int x86_pmu_enable(struct perf_event *event) |
1447 | intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | ||
1448 | { | 965 | { |
1449 | const struct event_constraint *event_constraint; | 966 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1450 | int i, code; | 967 | struct hw_perf_event *hwc; |
1451 | 968 | int assign[X86_PMC_IDX_MAX]; | |
1452 | if (!event_constraints) | 969 | int n, n0, ret; |
1453 | goto skip; | ||
1454 | 970 | ||
1455 | code = hwc->config & CORE_EVNTSEL_EVENT_MASK; | 971 | hwc = &event->hw; |
1456 | 972 | ||
1457 | for_each_event_constraint(event_constraint, event_constraints) { | 973 | n0 = cpuc->n_events; |
1458 | if (code == event_constraint->code) { | 974 | n = collect_events(cpuc, event, false); |
1459 | for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { | 975 | if (n < 0) |
1460 | if (!test_and_set_bit(i, cpuc->used_mask)) | 976 | return n; |
1461 | return i; | ||
1462 | } | ||
1463 | return -1; | ||
1464 | } | ||
1465 | } | ||
1466 | skip: | ||
1467 | return gen_get_event_idx(cpuc, hwc); | ||
1468 | } | ||
1469 | 977 | ||
1470 | static int | 978 | /* |
1471 | x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) | 979 | * If group events scheduling transaction was started, |
1472 | { | 980 | * skip the schedulability test here, it will be peformed |
1473 | int idx; | 981 | * at commit time(->commit_txn) as a whole |
982 | */ | ||
983 | if (cpuc->group_flag & PERF_EVENT_TXN) | ||
984 | goto out; | ||
1474 | 985 | ||
1475 | idx = fixed_mode_idx(hwc); | 986 | ret = x86_pmu.schedule_events(cpuc, n, assign); |
1476 | if (idx == X86_PMC_IDX_FIXED_BTS) { | 987 | if (ret) |
1477 | /* BTS is already occupied. */ | 988 | return ret; |
1478 | if (test_and_set_bit(idx, cpuc->used_mask)) | 989 | /* |
1479 | return -EAGAIN; | 990 | * copy new assignment, now we know it is possible |
991 | * will be used by hw_perf_enable() | ||
992 | */ | ||
993 | memcpy(cpuc->assign, assign, n*sizeof(int)); | ||
1480 | 994 | ||
1481 | hwc->config_base = 0; | 995 | out: |
1482 | hwc->event_base = 0; | 996 | cpuc->n_events = n; |
1483 | hwc->idx = idx; | 997 | cpuc->n_added += n - n0; |
1484 | } else if (idx >= 0) { | 998 | cpuc->n_txn += n - n0; |
1485 | /* | ||
1486 | * Try to get the fixed event, if that is already taken | ||
1487 | * then try to get a generic event: | ||
1488 | */ | ||
1489 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
1490 | goto try_generic; | ||
1491 | 999 | ||
1492 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | 1000 | return 0; |
1493 | /* | ||
1494 | * We set it so that event_base + idx in wrmsr/rdmsr maps to | ||
1495 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
1496 | */ | ||
1497 | hwc->event_base = | ||
1498 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
1499 | hwc->idx = idx; | ||
1500 | } else { | ||
1501 | idx = hwc->idx; | ||
1502 | /* Try to get the previous generic event again */ | ||
1503 | if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { | ||
1504 | try_generic: | ||
1505 | idx = x86_pmu.get_event_idx(cpuc, hwc); | ||
1506 | if (idx == -1) | ||
1507 | return -EAGAIN; | ||
1508 | |||
1509 | set_bit(idx, cpuc->used_mask); | ||
1510 | hwc->idx = idx; | ||
1511 | } | ||
1512 | hwc->config_base = x86_pmu.eventsel; | ||
1513 | hwc->event_base = x86_pmu.perfctr; | ||
1514 | } | ||
1515 | |||
1516 | return idx; | ||
1517 | } | 1001 | } |
1518 | 1002 | ||
1519 | /* | 1003 | static int x86_pmu_start(struct perf_event *event) |
1520 | * Find a PMC slot for the freshly enabled / scheduled in event: | ||
1521 | */ | ||
1522 | static int x86_pmu_enable(struct perf_event *event) | ||
1523 | { | 1004 | { |
1524 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1005 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1525 | struct hw_perf_event *hwc = &event->hw; | 1006 | int idx = event->hw.idx; |
1526 | int idx; | ||
1527 | 1007 | ||
1528 | idx = x86_schedule_event(cpuc, hwc); | 1008 | if (idx == -1) |
1529 | if (idx < 0) | 1009 | return -EAGAIN; |
1530 | return idx; | ||
1531 | |||
1532 | perf_events_lapic_init(); | ||
1533 | |||
1534 | x86_pmu.disable(hwc, idx); | ||
1535 | 1010 | ||
1011 | x86_perf_event_set_period(event); | ||
1536 | cpuc->events[idx] = event; | 1012 | cpuc->events[idx] = event; |
1537 | set_bit(idx, cpuc->active_mask); | 1013 | __set_bit(idx, cpuc->active_mask); |
1538 | 1014 | __set_bit(idx, cpuc->running); | |
1539 | x86_perf_event_set_period(event, hwc, idx); | 1015 | x86_pmu.enable(event); |
1540 | x86_pmu.enable(hwc, idx); | ||
1541 | |||
1542 | perf_event_update_userpage(event); | 1016 | perf_event_update_userpage(event); |
1543 | 1017 | ||
1544 | return 0; | 1018 | return 0; |
@@ -1546,24 +1020,19 @@ static int x86_pmu_enable(struct perf_event *event) | |||
1546 | 1020 | ||
1547 | static void x86_pmu_unthrottle(struct perf_event *event) | 1021 | static void x86_pmu_unthrottle(struct perf_event *event) |
1548 | { | 1022 | { |
1549 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1023 | int ret = x86_pmu_start(event); |
1550 | struct hw_perf_event *hwc = &event->hw; | 1024 | WARN_ON_ONCE(ret); |
1551 | |||
1552 | if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || | ||
1553 | cpuc->events[hwc->idx] != event)) | ||
1554 | return; | ||
1555 | |||
1556 | x86_pmu.enable(hwc, hwc->idx); | ||
1557 | } | 1025 | } |
1558 | 1026 | ||
1559 | void perf_event_print_debug(void) | 1027 | void perf_event_print_debug(void) |
1560 | { | 1028 | { |
1561 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | 1029 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; |
1030 | u64 pebs; | ||
1562 | struct cpu_hw_events *cpuc; | 1031 | struct cpu_hw_events *cpuc; |
1563 | unsigned long flags; | 1032 | unsigned long flags; |
1564 | int cpu, idx; | 1033 | int cpu, idx; |
1565 | 1034 | ||
1566 | if (!x86_pmu.num_events) | 1035 | if (!x86_pmu.num_counters) |
1567 | return; | 1036 | return; |
1568 | 1037 | ||
1569 | local_irq_save(flags); | 1038 | local_irq_save(flags); |
@@ -1576,16 +1045,18 @@ void perf_event_print_debug(void) | |||
1576 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | 1045 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); |
1577 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | 1046 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); |
1578 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | 1047 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); |
1048 | rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); | ||
1579 | 1049 | ||
1580 | pr_info("\n"); | 1050 | pr_info("\n"); |
1581 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | 1051 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); |
1582 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | 1052 | pr_info("CPU#%d: status: %016llx\n", cpu, status); |
1583 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | 1053 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); |
1584 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | 1054 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); |
1055 | pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); | ||
1585 | } | 1056 | } |
1586 | pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); | 1057 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
1587 | 1058 | ||
1588 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1059 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1589 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1060 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); |
1590 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | 1061 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); |
1591 | 1062 | ||
@@ -1598,7 +1069,7 @@ void perf_event_print_debug(void) | |||
1598 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", | 1069 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", |
1599 | cpu, idx, prev_left); | 1070 | cpu, idx, prev_left); |
1600 | } | 1071 | } |
1601 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | 1072 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { |
1602 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); | 1073 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); |
1603 | 1074 | ||
1604 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", | 1075 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", |
@@ -1607,257 +1078,58 @@ void perf_event_print_debug(void) | |||
1607 | local_irq_restore(flags); | 1078 | local_irq_restore(flags); |
1608 | } | 1079 | } |
1609 | 1080 | ||
1610 | static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) | 1081 | static void x86_pmu_stop(struct perf_event *event) |
1611 | { | ||
1612 | struct debug_store *ds = cpuc->ds; | ||
1613 | struct bts_record { | ||
1614 | u64 from; | ||
1615 | u64 to; | ||
1616 | u64 flags; | ||
1617 | }; | ||
1618 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
1619 | struct bts_record *at, *top; | ||
1620 | struct perf_output_handle handle; | ||
1621 | struct perf_event_header header; | ||
1622 | struct perf_sample_data data; | ||
1623 | struct pt_regs regs; | ||
1624 | |||
1625 | if (!event) | ||
1626 | return; | ||
1627 | |||
1628 | if (!ds) | ||
1629 | return; | ||
1630 | |||
1631 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
1632 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
1633 | |||
1634 | if (top <= at) | ||
1635 | return; | ||
1636 | |||
1637 | ds->bts_index = ds->bts_buffer_base; | ||
1638 | |||
1639 | |||
1640 | data.period = event->hw.last_period; | ||
1641 | data.addr = 0; | ||
1642 | data.raw = NULL; | ||
1643 | regs.ip = 0; | ||
1644 | |||
1645 | /* | ||
1646 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
1647 | * We will overwrite the from and to address before we output | ||
1648 | * the sample. | ||
1649 | */ | ||
1650 | perf_prepare_sample(&header, &data, event, ®s); | ||
1651 | |||
1652 | if (perf_output_begin(&handle, event, | ||
1653 | header.size * (top - at), 1, 1)) | ||
1654 | return; | ||
1655 | |||
1656 | for (; at < top; at++) { | ||
1657 | data.ip = at->from; | ||
1658 | data.addr = at->to; | ||
1659 | |||
1660 | perf_output_sample(&handle, &header, &data, event); | ||
1661 | } | ||
1662 | |||
1663 | perf_output_end(&handle); | ||
1664 | |||
1665 | /* There's new data available. */ | ||
1666 | event->hw.interrupts++; | ||
1667 | event->pending_kill = POLL_IN; | ||
1668 | } | ||
1669 | |||
1670 | static void x86_pmu_disable(struct perf_event *event) | ||
1671 | { | 1082 | { |
1672 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1083 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1673 | struct hw_perf_event *hwc = &event->hw; | 1084 | struct hw_perf_event *hwc = &event->hw; |
1674 | int idx = hwc->idx; | 1085 | int idx = hwc->idx; |
1675 | 1086 | ||
1676 | /* | 1087 | if (!__test_and_clear_bit(idx, cpuc->active_mask)) |
1677 | * Must be done before we disable, otherwise the nmi handler | 1088 | return; |
1678 | * could reenable again: | ||
1679 | */ | ||
1680 | clear_bit(idx, cpuc->active_mask); | ||
1681 | x86_pmu.disable(hwc, idx); | ||
1682 | 1089 | ||
1683 | /* | 1090 | x86_pmu.disable(event); |
1684 | * Make sure the cleared pointer becomes visible before we | ||
1685 | * (potentially) free the event: | ||
1686 | */ | ||
1687 | barrier(); | ||
1688 | 1091 | ||
1689 | /* | 1092 | /* |
1690 | * Drain the remaining delta count out of a event | 1093 | * Drain the remaining delta count out of a event |
1691 | * that we are disabling: | 1094 | * that we are disabling: |
1692 | */ | 1095 | */ |
1693 | x86_perf_event_update(event, hwc, idx); | 1096 | x86_perf_event_update(event); |
1694 | |||
1695 | /* Drain the remaining BTS records. */ | ||
1696 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) | ||
1697 | intel_pmu_drain_bts_buffer(cpuc); | ||
1698 | 1097 | ||
1699 | cpuc->events[idx] = NULL; | 1098 | cpuc->events[idx] = NULL; |
1700 | clear_bit(idx, cpuc->used_mask); | ||
1701 | |||
1702 | perf_event_update_userpage(event); | ||
1703 | } | ||
1704 | |||
1705 | /* | ||
1706 | * Save and restart an expired event. Called by NMI contexts, | ||
1707 | * so it has to be careful about preempting normal event ops: | ||
1708 | */ | ||
1709 | static int intel_pmu_save_and_restart(struct perf_event *event) | ||
1710 | { | ||
1711 | struct hw_perf_event *hwc = &event->hw; | ||
1712 | int idx = hwc->idx; | ||
1713 | int ret; | ||
1714 | |||
1715 | x86_perf_event_update(event, hwc, idx); | ||
1716 | ret = x86_perf_event_set_period(event, hwc, idx); | ||
1717 | |||
1718 | if (event->state == PERF_EVENT_STATE_ACTIVE) | ||
1719 | intel_pmu_enable_event(hwc, idx); | ||
1720 | |||
1721 | return ret; | ||
1722 | } | 1099 | } |
1723 | 1100 | ||
1724 | static void intel_pmu_reset(void) | 1101 | static void x86_pmu_disable(struct perf_event *event) |
1725 | { | 1102 | { |
1726 | struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; | 1103 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1727 | unsigned long flags; | 1104 | int i; |
1728 | int idx; | ||
1729 | 1105 | ||
1730 | if (!x86_pmu.num_events) | 1106 | /* |
1107 | * If we're called during a txn, we don't need to do anything. | ||
1108 | * The events never got scheduled and ->cancel_txn will truncate | ||
1109 | * the event_list. | ||
1110 | */ | ||
1111 | if (cpuc->group_flag & PERF_EVENT_TXN) | ||
1731 | return; | 1112 | return; |
1732 | 1113 | ||
1733 | local_irq_save(flags); | 1114 | x86_pmu_stop(event); |
1734 | |||
1735 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | ||
1736 | 1115 | ||
1737 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1116 | for (i = 0; i < cpuc->n_events; i++) { |
1738 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | 1117 | if (event == cpuc->event_list[i]) { |
1739 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | ||
1740 | } | ||
1741 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | ||
1742 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | ||
1743 | } | ||
1744 | if (ds) | ||
1745 | ds->bts_index = ds->bts_buffer_base; | ||
1746 | |||
1747 | local_irq_restore(flags); | ||
1748 | } | ||
1749 | 1118 | ||
1750 | static int p6_pmu_handle_irq(struct pt_regs *regs) | 1119 | if (x86_pmu.put_event_constraints) |
1751 | { | 1120 | x86_pmu.put_event_constraints(cpuc, event); |
1752 | struct perf_sample_data data; | ||
1753 | struct cpu_hw_events *cpuc; | ||
1754 | struct perf_event *event; | ||
1755 | struct hw_perf_event *hwc; | ||
1756 | int idx, handled = 0; | ||
1757 | u64 val; | ||
1758 | 1121 | ||
1759 | data.addr = 0; | 1122 | while (++i < cpuc->n_events) |
1760 | data.raw = NULL; | 1123 | cpuc->event_list[i-1] = cpuc->event_list[i]; |
1761 | 1124 | ||
1762 | cpuc = &__get_cpu_var(cpu_hw_events); | 1125 | --cpuc->n_events; |
1763 | 1126 | break; | |
1764 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1127 | } |
1765 | if (!test_bit(idx, cpuc->active_mask)) | ||
1766 | continue; | ||
1767 | |||
1768 | event = cpuc->events[idx]; | ||
1769 | hwc = &event->hw; | ||
1770 | |||
1771 | val = x86_perf_event_update(event, hwc, idx); | ||
1772 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | ||
1773 | continue; | ||
1774 | |||
1775 | /* | ||
1776 | * event overflow | ||
1777 | */ | ||
1778 | handled = 1; | ||
1779 | data.period = event->hw.last_period; | ||
1780 | |||
1781 | if (!x86_perf_event_set_period(event, hwc, idx)) | ||
1782 | continue; | ||
1783 | |||
1784 | if (perf_event_overflow(event, 1, &data, regs)) | ||
1785 | p6_pmu_disable_event(hwc, idx); | ||
1786 | } | ||
1787 | |||
1788 | if (handled) | ||
1789 | inc_irq_stat(apic_perf_irqs); | ||
1790 | |||
1791 | return handled; | ||
1792 | } | ||
1793 | |||
1794 | /* | ||
1795 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
1796 | * rules apply: | ||
1797 | */ | ||
1798 | static int intel_pmu_handle_irq(struct pt_regs *regs) | ||
1799 | { | ||
1800 | struct perf_sample_data data; | ||
1801 | struct cpu_hw_events *cpuc; | ||
1802 | int bit, loops; | ||
1803 | u64 ack, status; | ||
1804 | |||
1805 | data.addr = 0; | ||
1806 | data.raw = NULL; | ||
1807 | |||
1808 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
1809 | |||
1810 | perf_disable(); | ||
1811 | intel_pmu_drain_bts_buffer(cpuc); | ||
1812 | status = intel_pmu_get_status(); | ||
1813 | if (!status) { | ||
1814 | perf_enable(); | ||
1815 | return 0; | ||
1816 | } | ||
1817 | |||
1818 | loops = 0; | ||
1819 | again: | ||
1820 | if (++loops > 100) { | ||
1821 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | ||
1822 | perf_event_print_debug(); | ||
1823 | intel_pmu_reset(); | ||
1824 | perf_enable(); | ||
1825 | return 1; | ||
1826 | } | ||
1827 | |||
1828 | inc_irq_stat(apic_perf_irqs); | ||
1829 | ack = status; | ||
1830 | for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
1831 | struct perf_event *event = cpuc->events[bit]; | ||
1832 | |||
1833 | clear_bit(bit, (unsigned long *) &status); | ||
1834 | if (!test_bit(bit, cpuc->active_mask)) | ||
1835 | continue; | ||
1836 | |||
1837 | if (!intel_pmu_save_and_restart(event)) | ||
1838 | continue; | ||
1839 | |||
1840 | data.period = event->hw.last_period; | ||
1841 | |||
1842 | if (perf_event_overflow(event, 1, &data, regs)) | ||
1843 | intel_pmu_disable_event(&event->hw, bit); | ||
1844 | } | 1128 | } |
1845 | 1129 | perf_event_update_userpage(event); | |
1846 | intel_pmu_ack_status(ack); | ||
1847 | |||
1848 | /* | ||
1849 | * Repeat if there is more work to be done: | ||
1850 | */ | ||
1851 | status = intel_pmu_get_status(); | ||
1852 | if (status) | ||
1853 | goto again; | ||
1854 | |||
1855 | perf_enable(); | ||
1856 | |||
1857 | return 1; | ||
1858 | } | 1130 | } |
1859 | 1131 | ||
1860 | static int amd_pmu_handle_irq(struct pt_regs *regs) | 1132 | static int x86_pmu_handle_irq(struct pt_regs *regs) |
1861 | { | 1133 | { |
1862 | struct perf_sample_data data; | 1134 | struct perf_sample_data data; |
1863 | struct cpu_hw_events *cpuc; | 1135 | struct cpu_hw_events *cpuc; |
@@ -1866,33 +1138,40 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) | |||
1866 | int idx, handled = 0; | 1138 | int idx, handled = 0; |
1867 | u64 val; | 1139 | u64 val; |
1868 | 1140 | ||
1869 | data.addr = 0; | 1141 | perf_sample_data_init(&data, 0); |
1870 | data.raw = NULL; | ||
1871 | 1142 | ||
1872 | cpuc = &__get_cpu_var(cpu_hw_events); | 1143 | cpuc = &__get_cpu_var(cpu_hw_events); |
1873 | 1144 | ||
1874 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1145 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1875 | if (!test_bit(idx, cpuc->active_mask)) | 1146 | if (!test_bit(idx, cpuc->active_mask)) { |
1147 | /* | ||
1148 | * Though we deactivated the counter some cpus | ||
1149 | * might still deliver spurious interrupts still | ||
1150 | * in flight. Catch them: | ||
1151 | */ | ||
1152 | if (__test_and_clear_bit(idx, cpuc->running)) | ||
1153 | handled++; | ||
1876 | continue; | 1154 | continue; |
1155 | } | ||
1877 | 1156 | ||
1878 | event = cpuc->events[idx]; | 1157 | event = cpuc->events[idx]; |
1879 | hwc = &event->hw; | 1158 | hwc = &event->hw; |
1880 | 1159 | ||
1881 | val = x86_perf_event_update(event, hwc, idx); | 1160 | val = x86_perf_event_update(event); |
1882 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | 1161 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) |
1883 | continue; | 1162 | continue; |
1884 | 1163 | ||
1885 | /* | 1164 | /* |
1886 | * event overflow | 1165 | * event overflow |
1887 | */ | 1166 | */ |
1888 | handled = 1; | 1167 | handled++; |
1889 | data.period = event->hw.last_period; | 1168 | data.period = event->hw.last_period; |
1890 | 1169 | ||
1891 | if (!x86_perf_event_set_period(event, hwc, idx)) | 1170 | if (!x86_perf_event_set_period(event)) |
1892 | continue; | 1171 | continue; |
1893 | 1172 | ||
1894 | if (perf_event_overflow(event, 1, &data, regs)) | 1173 | if (perf_event_overflow(event, 1, &data, regs)) |
1895 | amd_pmu_disable_event(hwc, idx); | 1174 | x86_pmu_stop(event); |
1896 | } | 1175 | } |
1897 | 1176 | ||
1898 | if (handled) | 1177 | if (handled) |
@@ -1922,7 +1201,6 @@ void set_perf_event_pending(void) | |||
1922 | 1201 | ||
1923 | void perf_events_lapic_init(void) | 1202 | void perf_events_lapic_init(void) |
1924 | { | 1203 | { |
1925 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1926 | if (!x86_pmu.apic || !x86_pmu_initialized()) | 1204 | if (!x86_pmu.apic || !x86_pmu_initialized()) |
1927 | return; | 1205 | return; |
1928 | 1206 | ||
@@ -1930,15 +1208,22 @@ void perf_events_lapic_init(void) | |||
1930 | * Always use NMI for PMU | 1208 | * Always use NMI for PMU |
1931 | */ | 1209 | */ |
1932 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1210 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1933 | #endif | ||
1934 | } | 1211 | } |
1935 | 1212 | ||
1213 | struct pmu_nmi_state { | ||
1214 | unsigned int marked; | ||
1215 | int handled; | ||
1216 | }; | ||
1217 | |||
1218 | static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi); | ||
1219 | |||
1936 | static int __kprobes | 1220 | static int __kprobes |
1937 | perf_event_nmi_handler(struct notifier_block *self, | 1221 | perf_event_nmi_handler(struct notifier_block *self, |
1938 | unsigned long cmd, void *__args) | 1222 | unsigned long cmd, void *__args) |
1939 | { | 1223 | { |
1940 | struct die_args *args = __args; | 1224 | struct die_args *args = __args; |
1941 | struct pt_regs *regs; | 1225 | unsigned int this_nmi; |
1226 | int handled; | ||
1942 | 1227 | ||
1943 | if (!atomic_read(&active_events)) | 1228 | if (!atomic_read(&active_events)) |
1944 | return NOTIFY_DONE; | 1229 | return NOTIFY_DONE; |
@@ -1947,24 +1232,47 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
1947 | case DIE_NMI: | 1232 | case DIE_NMI: |
1948 | case DIE_NMI_IPI: | 1233 | case DIE_NMI_IPI: |
1949 | break; | 1234 | break; |
1950 | 1235 | case DIE_NMIUNKNOWN: | |
1236 | this_nmi = percpu_read(irq_stat.__nmi_count); | ||
1237 | if (this_nmi != __get_cpu_var(pmu_nmi).marked) | ||
1238 | /* let the kernel handle the unknown nmi */ | ||
1239 | return NOTIFY_DONE; | ||
1240 | /* | ||
1241 | * This one is a PMU back-to-back nmi. Two events | ||
1242 | * trigger 'simultaneously' raising two back-to-back | ||
1243 | * NMIs. If the first NMI handles both, the latter | ||
1244 | * will be empty and daze the CPU. So, we drop it to | ||
1245 | * avoid false-positive 'unknown nmi' messages. | ||
1246 | */ | ||
1247 | return NOTIFY_STOP; | ||
1951 | default: | 1248 | default: |
1952 | return NOTIFY_DONE; | 1249 | return NOTIFY_DONE; |
1953 | } | 1250 | } |
1954 | 1251 | ||
1955 | regs = args->regs; | ||
1956 | |||
1957 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1958 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1252 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1959 | #endif | 1253 | |
1960 | /* | 1254 | handled = x86_pmu.handle_irq(args->regs); |
1961 | * Can't rely on the handled return value to say it was our NMI, two | 1255 | if (!handled) |
1962 | * events could trigger 'simultaneously' raising two back-to-back NMIs. | 1256 | return NOTIFY_DONE; |
1963 | * | 1257 | |
1964 | * If the first NMI handles both, the latter will be empty and daze | 1258 | this_nmi = percpu_read(irq_stat.__nmi_count); |
1965 | * the CPU. | 1259 | if ((handled > 1) || |
1966 | */ | 1260 | /* the next nmi could be a back-to-back nmi */ |
1967 | x86_pmu.handle_irq(regs); | 1261 | ((__get_cpu_var(pmu_nmi).marked == this_nmi) && |
1262 | (__get_cpu_var(pmu_nmi).handled > 1))) { | ||
1263 | /* | ||
1264 | * We could have two subsequent back-to-back nmis: The | ||
1265 | * first handles more than one counter, the 2nd | ||
1266 | * handles only one counter and the 3rd handles no | ||
1267 | * counter. | ||
1268 | * | ||
1269 | * This is the 2nd nmi because the previous was | ||
1270 | * handling more than one counter. We will mark the | ||
1271 | * next (3rd) and then drop it if unhandled. | ||
1272 | */ | ||
1273 | __get_cpu_var(pmu_nmi).marked = this_nmi + 1; | ||
1274 | __get_cpu_var(pmu_nmi).handled = handled; | ||
1275 | } | ||
1968 | 1276 | ||
1969 | return NOTIFY_STOP; | 1277 | return NOTIFY_STOP; |
1970 | } | 1278 | } |
@@ -1975,193 +1283,64 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { | |||
1975 | .priority = 1 | 1283 | .priority = 1 |
1976 | }; | 1284 | }; |
1977 | 1285 | ||
1978 | static __initconst struct x86_pmu p6_pmu = { | 1286 | static struct event_constraint unconstrained; |
1979 | .name = "p6", | 1287 | static struct event_constraint emptyconstraint; |
1980 | .handle_irq = p6_pmu_handle_irq, | ||
1981 | .disable_all = p6_pmu_disable_all, | ||
1982 | .enable_all = p6_pmu_enable_all, | ||
1983 | .enable = p6_pmu_enable_event, | ||
1984 | .disable = p6_pmu_disable_event, | ||
1985 | .eventsel = MSR_P6_EVNTSEL0, | ||
1986 | .perfctr = MSR_P6_PERFCTR0, | ||
1987 | .event_map = p6_pmu_event_map, | ||
1988 | .raw_event = p6_pmu_raw_event, | ||
1989 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | ||
1990 | .apic = 1, | ||
1991 | .max_period = (1ULL << 31) - 1, | ||
1992 | .version = 0, | ||
1993 | .num_events = 2, | ||
1994 | /* | ||
1995 | * Events have 40 bits implemented. However they are designed such | ||
1996 | * that bits [32-39] are sign extensions of bit 31. As such the | ||
1997 | * effective width of a event for P6-like PMU is 32 bits only. | ||
1998 | * | ||
1999 | * See IA-32 Intel Architecture Software developer manual Vol 3B | ||
2000 | */ | ||
2001 | .event_bits = 32, | ||
2002 | .event_mask = (1ULL << 32) - 1, | ||
2003 | .get_event_idx = intel_get_event_idx, | ||
2004 | }; | ||
2005 | 1288 | ||
2006 | static __initconst struct x86_pmu intel_pmu = { | 1289 | static struct event_constraint * |
2007 | .name = "Intel", | 1290 | x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) |
2008 | .handle_irq = intel_pmu_handle_irq, | ||
2009 | .disable_all = intel_pmu_disable_all, | ||
2010 | .enable_all = intel_pmu_enable_all, | ||
2011 | .enable = intel_pmu_enable_event, | ||
2012 | .disable = intel_pmu_disable_event, | ||
2013 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
2014 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
2015 | .event_map = intel_pmu_event_map, | ||
2016 | .raw_event = intel_pmu_raw_event, | ||
2017 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
2018 | .apic = 1, | ||
2019 | /* | ||
2020 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
2021 | * so we install an artificial 1<<31 period regardless of | ||
2022 | * the generic event period: | ||
2023 | */ | ||
2024 | .max_period = (1ULL << 31) - 1, | ||
2025 | .enable_bts = intel_pmu_enable_bts, | ||
2026 | .disable_bts = intel_pmu_disable_bts, | ||
2027 | .get_event_idx = intel_get_event_idx, | ||
2028 | }; | ||
2029 | |||
2030 | static __initconst struct x86_pmu amd_pmu = { | ||
2031 | .name = "AMD", | ||
2032 | .handle_irq = amd_pmu_handle_irq, | ||
2033 | .disable_all = amd_pmu_disable_all, | ||
2034 | .enable_all = amd_pmu_enable_all, | ||
2035 | .enable = amd_pmu_enable_event, | ||
2036 | .disable = amd_pmu_disable_event, | ||
2037 | .eventsel = MSR_K7_EVNTSEL0, | ||
2038 | .perfctr = MSR_K7_PERFCTR0, | ||
2039 | .event_map = amd_pmu_event_map, | ||
2040 | .raw_event = amd_pmu_raw_event, | ||
2041 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
2042 | .num_events = 4, | ||
2043 | .event_bits = 48, | ||
2044 | .event_mask = (1ULL << 48) - 1, | ||
2045 | .apic = 1, | ||
2046 | /* use highest bit to detect overflow */ | ||
2047 | .max_period = (1ULL << 47) - 1, | ||
2048 | .get_event_idx = gen_get_event_idx, | ||
2049 | }; | ||
2050 | |||
2051 | static __init int p6_pmu_init(void) | ||
2052 | { | 1291 | { |
2053 | switch (boot_cpu_data.x86_model) { | 1292 | struct event_constraint *c; |
2054 | case 1: | ||
2055 | case 3: /* Pentium Pro */ | ||
2056 | case 5: | ||
2057 | case 6: /* Pentium II */ | ||
2058 | case 7: | ||
2059 | case 8: | ||
2060 | case 11: /* Pentium III */ | ||
2061 | event_constraints = intel_p6_event_constraints; | ||
2062 | break; | ||
2063 | case 9: | ||
2064 | case 13: | ||
2065 | /* Pentium M */ | ||
2066 | event_constraints = intel_p6_event_constraints; | ||
2067 | break; | ||
2068 | default: | ||
2069 | pr_cont("unsupported p6 CPU model %d ", | ||
2070 | boot_cpu_data.x86_model); | ||
2071 | return -ENODEV; | ||
2072 | } | ||
2073 | 1293 | ||
2074 | x86_pmu = p6_pmu; | 1294 | if (x86_pmu.event_constraints) { |
1295 | for_each_event_constraint(c, x86_pmu.event_constraints) { | ||
1296 | if ((event->hw.config & c->cmask) == c->code) | ||
1297 | return c; | ||
1298 | } | ||
1299 | } | ||
2075 | 1300 | ||
2076 | return 0; | 1301 | return &unconstrained; |
2077 | } | 1302 | } |
2078 | 1303 | ||
2079 | static __init int intel_pmu_init(void) | 1304 | #include "perf_event_amd.c" |
2080 | { | 1305 | #include "perf_event_p6.c" |
2081 | union cpuid10_edx edx; | 1306 | #include "perf_event_p4.c" |
2082 | union cpuid10_eax eax; | 1307 | #include "perf_event_intel_lbr.c" |
2083 | unsigned int unused; | 1308 | #include "perf_event_intel_ds.c" |
2084 | unsigned int ebx; | 1309 | #include "perf_event_intel.c" |
2085 | int version; | ||
2086 | |||
2087 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
2088 | /* check for P6 processor family */ | ||
2089 | if (boot_cpu_data.x86 == 6) { | ||
2090 | return p6_pmu_init(); | ||
2091 | } else { | ||
2092 | return -ENODEV; | ||
2093 | } | ||
2094 | } | ||
2095 | |||
2096 | /* | ||
2097 | * Check whether the Architectural PerfMon supports | ||
2098 | * Branch Misses Retired hw_event or not. | ||
2099 | */ | ||
2100 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
2101 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
2102 | return -ENODEV; | ||
2103 | 1310 | ||
2104 | version = eax.split.version_id; | 1311 | static int __cpuinit |
2105 | if (version < 2) | 1312 | x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) |
2106 | return -ENODEV; | 1313 | { |
1314 | unsigned int cpu = (long)hcpu; | ||
1315 | int ret = NOTIFY_OK; | ||
2107 | 1316 | ||
2108 | x86_pmu = intel_pmu; | 1317 | switch (action & ~CPU_TASKS_FROZEN) { |
2109 | x86_pmu.version = version; | 1318 | case CPU_UP_PREPARE: |
2110 | x86_pmu.num_events = eax.split.num_events; | 1319 | if (x86_pmu.cpu_prepare) |
2111 | x86_pmu.event_bits = eax.split.bit_width; | 1320 | ret = x86_pmu.cpu_prepare(cpu); |
2112 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | 1321 | break; |
2113 | 1322 | ||
2114 | /* | 1323 | case CPU_STARTING: |
2115 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 1324 | if (x86_pmu.cpu_starting) |
2116 | * assume at least 3 events: | 1325 | x86_pmu.cpu_starting(cpu); |
2117 | */ | 1326 | break; |
2118 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | ||
2119 | 1327 | ||
2120 | /* | 1328 | case CPU_DYING: |
2121 | * Install the hw-cache-events table: | 1329 | if (x86_pmu.cpu_dying) |
2122 | */ | 1330 | x86_pmu.cpu_dying(cpu); |
2123 | switch (boot_cpu_data.x86_model) { | ||
2124 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | ||
2125 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | ||
2126 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | ||
2127 | case 29: /* six-core 45 nm xeon "Dunnington" */ | ||
2128 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | ||
2129 | sizeof(hw_cache_event_ids)); | ||
2130 | |||
2131 | pr_cont("Core2 events, "); | ||
2132 | event_constraints = intel_core_event_constraints; | ||
2133 | break; | 1331 | break; |
2134 | default: | ||
2135 | case 26: | ||
2136 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | ||
2137 | sizeof(hw_cache_event_ids)); | ||
2138 | 1332 | ||
2139 | event_constraints = intel_nehalem_event_constraints; | 1333 | case CPU_UP_CANCELED: |
2140 | pr_cont("Nehalem/Corei7 events, "); | 1334 | case CPU_DEAD: |
1335 | if (x86_pmu.cpu_dead) | ||
1336 | x86_pmu.cpu_dead(cpu); | ||
2141 | break; | 1337 | break; |
2142 | case 28: | ||
2143 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
2144 | sizeof(hw_cache_event_ids)); | ||
2145 | 1338 | ||
2146 | pr_cont("Atom events, "); | 1339 | default: |
2147 | break; | 1340 | break; |
2148 | } | 1341 | } |
2149 | return 0; | ||
2150 | } | ||
2151 | 1342 | ||
2152 | static __init int amd_pmu_init(void) | 1343 | return ret; |
2153 | { | ||
2154 | /* Performance-monitoring supported from K7 and later: */ | ||
2155 | if (boot_cpu_data.x86 < 6) | ||
2156 | return -ENODEV; | ||
2157 | |||
2158 | x86_pmu = amd_pmu; | ||
2159 | |||
2160 | /* Events are common for all AMDs */ | ||
2161 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | ||
2162 | sizeof(hw_cache_event_ids)); | ||
2163 | |||
2164 | return 0; | ||
2165 | } | 1344 | } |
2166 | 1345 | ||
2167 | static void __init pmu_check_apic(void) | 1346 | static void __init pmu_check_apic(void) |
@@ -2176,6 +1355,7 @@ static void __init pmu_check_apic(void) | |||
2176 | 1355 | ||
2177 | void __init init_hw_perf_events(void) | 1356 | void __init init_hw_perf_events(void) |
2178 | { | 1357 | { |
1358 | struct event_constraint *c; | ||
2179 | int err; | 1359 | int err; |
2180 | 1360 | ||
2181 | pr_info("Performance Events: "); | 1361 | pr_info("Performance Events: "); |
@@ -2199,88 +1379,227 @@ void __init init_hw_perf_events(void) | |||
2199 | 1379 | ||
2200 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1380 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
2201 | 1381 | ||
2202 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { | 1382 | if (x86_pmu.quirks) |
1383 | x86_pmu.quirks(); | ||
1384 | |||
1385 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | ||
2203 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1386 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
2204 | x86_pmu.num_events, X86_PMC_MAX_GENERIC); | 1387 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); |
2205 | x86_pmu.num_events = X86_PMC_MAX_GENERIC; | 1388 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; |
2206 | } | 1389 | } |
2207 | perf_event_mask = (1 << x86_pmu.num_events) - 1; | 1390 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; |
2208 | perf_max_events = x86_pmu.num_events; | 1391 | perf_max_events = x86_pmu.num_counters; |
2209 | 1392 | ||
2210 | if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { | 1393 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { |
2211 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | 1394 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", |
2212 | x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); | 1395 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); |
2213 | x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; | 1396 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; |
2214 | } | 1397 | } |
2215 | 1398 | ||
2216 | perf_event_mask |= | 1399 | x86_pmu.intel_ctrl |= |
2217 | ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; | 1400 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; |
2218 | x86_pmu.intel_ctrl = perf_event_mask; | ||
2219 | 1401 | ||
2220 | perf_events_lapic_init(); | 1402 | perf_events_lapic_init(); |
2221 | register_die_notifier(&perf_event_nmi_notifier); | 1403 | register_die_notifier(&perf_event_nmi_notifier); |
2222 | 1404 | ||
1405 | unconstrained = (struct event_constraint) | ||
1406 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, | ||
1407 | 0, x86_pmu.num_counters); | ||
1408 | |||
1409 | if (x86_pmu.event_constraints) { | ||
1410 | for_each_event_constraint(c, x86_pmu.event_constraints) { | ||
1411 | if (c->cmask != X86_RAW_EVENT_MASK) | ||
1412 | continue; | ||
1413 | |||
1414 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; | ||
1415 | c->weight += x86_pmu.num_counters; | ||
1416 | } | ||
1417 | } | ||
1418 | |||
2223 | pr_info("... version: %d\n", x86_pmu.version); | 1419 | pr_info("... version: %d\n", x86_pmu.version); |
2224 | pr_info("... bit width: %d\n", x86_pmu.event_bits); | 1420 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
2225 | pr_info("... generic registers: %d\n", x86_pmu.num_events); | 1421 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
2226 | pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); | 1422 | pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); |
2227 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | 1423 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); |
2228 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); | 1424 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); |
2229 | pr_info("... event mask: %016Lx\n", perf_event_mask); | 1425 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); |
1426 | |||
1427 | perf_cpu_notifier(x86_pmu_notifier); | ||
2230 | } | 1428 | } |
2231 | 1429 | ||
2232 | static inline void x86_pmu_read(struct perf_event *event) | 1430 | static inline void x86_pmu_read(struct perf_event *event) |
2233 | { | 1431 | { |
2234 | x86_perf_event_update(event, &event->hw, event->hw.idx); | 1432 | x86_perf_event_update(event); |
1433 | } | ||
1434 | |||
1435 | /* | ||
1436 | * Start group events scheduling transaction | ||
1437 | * Set the flag to make pmu::enable() not perform the | ||
1438 | * schedulability test, it will be performed at commit time | ||
1439 | */ | ||
1440 | static void x86_pmu_start_txn(const struct pmu *pmu) | ||
1441 | { | ||
1442 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1443 | |||
1444 | cpuc->group_flag |= PERF_EVENT_TXN; | ||
1445 | cpuc->n_txn = 0; | ||
1446 | } | ||
1447 | |||
1448 | /* | ||
1449 | * Stop group events scheduling transaction | ||
1450 | * Clear the flag and pmu::enable() will perform the | ||
1451 | * schedulability test. | ||
1452 | */ | ||
1453 | static void x86_pmu_cancel_txn(const struct pmu *pmu) | ||
1454 | { | ||
1455 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1456 | |||
1457 | cpuc->group_flag &= ~PERF_EVENT_TXN; | ||
1458 | /* | ||
1459 | * Truncate the collected events. | ||
1460 | */ | ||
1461 | cpuc->n_added -= cpuc->n_txn; | ||
1462 | cpuc->n_events -= cpuc->n_txn; | ||
1463 | } | ||
1464 | |||
1465 | /* | ||
1466 | * Commit group events scheduling transaction | ||
1467 | * Perform the group schedulability test as a whole | ||
1468 | * Return 0 if success | ||
1469 | */ | ||
1470 | static int x86_pmu_commit_txn(const struct pmu *pmu) | ||
1471 | { | ||
1472 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1473 | int assign[X86_PMC_IDX_MAX]; | ||
1474 | int n, ret; | ||
1475 | |||
1476 | n = cpuc->n_events; | ||
1477 | |||
1478 | if (!x86_pmu_initialized()) | ||
1479 | return -EAGAIN; | ||
1480 | |||
1481 | ret = x86_pmu.schedule_events(cpuc, n, assign); | ||
1482 | if (ret) | ||
1483 | return ret; | ||
1484 | |||
1485 | /* | ||
1486 | * copy new assignment, now we know it is possible | ||
1487 | * will be used by hw_perf_enable() | ||
1488 | */ | ||
1489 | memcpy(cpuc->assign, assign, n*sizeof(int)); | ||
1490 | |||
1491 | cpuc->group_flag &= ~PERF_EVENT_TXN; | ||
1492 | |||
1493 | return 0; | ||
2235 | } | 1494 | } |
2236 | 1495 | ||
2237 | static const struct pmu pmu = { | 1496 | static const struct pmu pmu = { |
2238 | .enable = x86_pmu_enable, | 1497 | .enable = x86_pmu_enable, |
2239 | .disable = x86_pmu_disable, | 1498 | .disable = x86_pmu_disable, |
1499 | .start = x86_pmu_start, | ||
1500 | .stop = x86_pmu_stop, | ||
2240 | .read = x86_pmu_read, | 1501 | .read = x86_pmu_read, |
2241 | .unthrottle = x86_pmu_unthrottle, | 1502 | .unthrottle = x86_pmu_unthrottle, |
1503 | .start_txn = x86_pmu_start_txn, | ||
1504 | .cancel_txn = x86_pmu_cancel_txn, | ||
1505 | .commit_txn = x86_pmu_commit_txn, | ||
2242 | }; | 1506 | }; |
2243 | 1507 | ||
2244 | static int | 1508 | /* |
2245 | validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) | 1509 | * validate that we can schedule this event |
1510 | */ | ||
1511 | static int validate_event(struct perf_event *event) | ||
2246 | { | 1512 | { |
2247 | struct hw_perf_event fake_event = event->hw; | 1513 | struct cpu_hw_events *fake_cpuc; |
1514 | struct event_constraint *c; | ||
1515 | int ret = 0; | ||
2248 | 1516 | ||
2249 | if (event->pmu && event->pmu != &pmu) | 1517 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); |
2250 | return 0; | 1518 | if (!fake_cpuc) |
1519 | return -ENOMEM; | ||
1520 | |||
1521 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | ||
1522 | |||
1523 | if (!c || !c->weight) | ||
1524 | ret = -ENOSPC; | ||
1525 | |||
1526 | if (x86_pmu.put_event_constraints) | ||
1527 | x86_pmu.put_event_constraints(fake_cpuc, event); | ||
1528 | |||
1529 | kfree(fake_cpuc); | ||
2251 | 1530 | ||
2252 | return x86_schedule_event(cpuc, &fake_event) >= 0; | 1531 | return ret; |
2253 | } | 1532 | } |
2254 | 1533 | ||
1534 | /* | ||
1535 | * validate a single event group | ||
1536 | * | ||
1537 | * validation include: | ||
1538 | * - check events are compatible which each other | ||
1539 | * - events do not compete for the same counter | ||
1540 | * - number of events <= number of counters | ||
1541 | * | ||
1542 | * validation ensures the group can be loaded onto the | ||
1543 | * PMU if it was the only group available. | ||
1544 | */ | ||
2255 | static int validate_group(struct perf_event *event) | 1545 | static int validate_group(struct perf_event *event) |
2256 | { | 1546 | { |
2257 | struct perf_event *sibling, *leader = event->group_leader; | 1547 | struct perf_event *leader = event->group_leader; |
2258 | struct cpu_hw_events fake_pmu; | 1548 | struct cpu_hw_events *fake_cpuc; |
1549 | int ret, n; | ||
2259 | 1550 | ||
2260 | memset(&fake_pmu, 0, sizeof(fake_pmu)); | 1551 | ret = -ENOMEM; |
1552 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
1553 | if (!fake_cpuc) | ||
1554 | goto out; | ||
2261 | 1555 | ||
2262 | if (!validate_event(&fake_pmu, leader)) | 1556 | /* |
2263 | return -ENOSPC; | 1557 | * the event is not yet connected with its |
1558 | * siblings therefore we must first collect | ||
1559 | * existing siblings, then add the new event | ||
1560 | * before we can simulate the scheduling | ||
1561 | */ | ||
1562 | ret = -ENOSPC; | ||
1563 | n = collect_events(fake_cpuc, leader, true); | ||
1564 | if (n < 0) | ||
1565 | goto out_free; | ||
2264 | 1566 | ||
2265 | list_for_each_entry(sibling, &leader->sibling_list, group_entry) { | 1567 | fake_cpuc->n_events = n; |
2266 | if (!validate_event(&fake_pmu, sibling)) | 1568 | n = collect_events(fake_cpuc, event, false); |
2267 | return -ENOSPC; | 1569 | if (n < 0) |
2268 | } | 1570 | goto out_free; |
2269 | 1571 | ||
2270 | if (!validate_event(&fake_pmu, event)) | 1572 | fake_cpuc->n_events = n; |
2271 | return -ENOSPC; | ||
2272 | 1573 | ||
2273 | return 0; | 1574 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); |
1575 | |||
1576 | out_free: | ||
1577 | kfree(fake_cpuc); | ||
1578 | out: | ||
1579 | return ret; | ||
2274 | } | 1580 | } |
2275 | 1581 | ||
2276 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 1582 | const struct pmu *hw_perf_event_init(struct perf_event *event) |
2277 | { | 1583 | { |
1584 | const struct pmu *tmp; | ||
2278 | int err; | 1585 | int err; |
2279 | 1586 | ||
2280 | err = __hw_perf_event_init(event); | 1587 | err = __hw_perf_event_init(event); |
2281 | if (!err) { | 1588 | if (!err) { |
1589 | /* | ||
1590 | * we temporarily connect event to its pmu | ||
1591 | * such that validate_group() can classify | ||
1592 | * it as an x86 event using is_x86_event() | ||
1593 | */ | ||
1594 | tmp = event->pmu; | ||
1595 | event->pmu = &pmu; | ||
1596 | |||
2282 | if (event->group_leader != event) | 1597 | if (event->group_leader != event) |
2283 | err = validate_group(event); | 1598 | err = validate_group(event); |
1599 | else | ||
1600 | err = validate_event(event); | ||
1601 | |||
1602 | event->pmu = tmp; | ||
2284 | } | 1603 | } |
2285 | if (err) { | 1604 | if (err) { |
2286 | if (event->destroy) | 1605 | if (event->destroy) |
@@ -2304,7 +1623,6 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) | |||
2304 | 1623 | ||
2305 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | 1624 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); |
2306 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); | 1625 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); |
2307 | static DEFINE_PER_CPU(int, in_ignored_frame); | ||
2308 | 1626 | ||
2309 | 1627 | ||
2310 | static void | 1628 | static void |
@@ -2320,10 +1638,6 @@ static void backtrace_warning(void *data, char *msg) | |||
2320 | 1638 | ||
2321 | static int backtrace_stack(void *data, char *name) | 1639 | static int backtrace_stack(void *data, char *name) |
2322 | { | 1640 | { |
2323 | per_cpu(in_ignored_frame, smp_processor_id()) = | ||
2324 | x86_is_stack_id(NMI_STACK, name) || | ||
2325 | x86_is_stack_id(DEBUG_STACK, name); | ||
2326 | |||
2327 | return 0; | 1641 | return 0; |
2328 | } | 1642 | } |
2329 | 1643 | ||
@@ -2331,11 +1645,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
2331 | { | 1645 | { |
2332 | struct perf_callchain_entry *entry = data; | 1646 | struct perf_callchain_entry *entry = data; |
2333 | 1647 | ||
2334 | if (per_cpu(in_ignored_frame, smp_processor_id())) | 1648 | callchain_store(entry, addr); |
2335 | return; | ||
2336 | |||
2337 | if (reliable) | ||
2338 | callchain_store(entry, addr); | ||
2339 | } | 1649 | } |
2340 | 1650 | ||
2341 | static const struct stacktrace_ops backtrace_ops = { | 1651 | static const struct stacktrace_ops backtrace_ops = { |
@@ -2346,8 +1656,6 @@ static const struct stacktrace_ops backtrace_ops = { | |||
2346 | .walk_stack = print_context_stack_bp, | 1656 | .walk_stack = print_context_stack_bp, |
2347 | }; | 1657 | }; |
2348 | 1658 | ||
2349 | #include "../dumpstack.h" | ||
2350 | |||
2351 | static void | 1659 | static void |
2352 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1660 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) |
2353 | { | 1661 | { |
@@ -2357,49 +1665,42 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
2357 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); | 1665 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); |
2358 | } | 1666 | } |
2359 | 1667 | ||
2360 | /* | 1668 | #ifdef CONFIG_COMPAT |
2361 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | 1669 | static inline int |
2362 | */ | 1670 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) |
2363 | static unsigned long | ||
2364 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
2365 | { | 1671 | { |
2366 | unsigned long offset, addr = (unsigned long)from; | 1672 | /* 32-bit process in 64-bit kernel. */ |
2367 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | 1673 | struct stack_frame_ia32 frame; |
2368 | unsigned long size, len = 0; | 1674 | const void __user *fp; |
2369 | struct page *page; | ||
2370 | void *map; | ||
2371 | int ret; | ||
2372 | |||
2373 | do { | ||
2374 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
2375 | if (!ret) | ||
2376 | break; | ||
2377 | 1675 | ||
2378 | offset = addr & (PAGE_SIZE - 1); | 1676 | if (!test_thread_flag(TIF_IA32)) |
2379 | size = min(PAGE_SIZE - offset, n - len); | 1677 | return 0; |
2380 | 1678 | ||
2381 | map = kmap_atomic(page, type); | 1679 | fp = compat_ptr(regs->bp); |
2382 | memcpy(to, map+offset, size); | 1680 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
2383 | kunmap_atomic(map, type); | 1681 | unsigned long bytes; |
2384 | put_page(page); | 1682 | frame.next_frame = 0; |
1683 | frame.return_address = 0; | ||
2385 | 1684 | ||
2386 | len += size; | 1685 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); |
2387 | to += size; | 1686 | if (bytes != sizeof(frame)) |
2388 | addr += size; | 1687 | break; |
2389 | 1688 | ||
2390 | } while (len < n); | 1689 | if (fp < compat_ptr(regs->sp)) |
1690 | break; | ||
2391 | 1691 | ||
2392 | return len; | 1692 | callchain_store(entry, frame.return_address); |
1693 | fp = compat_ptr(frame.next_frame); | ||
1694 | } | ||
1695 | return 1; | ||
2393 | } | 1696 | } |
2394 | 1697 | #else | |
2395 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 1698 | static inline int |
1699 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
2396 | { | 1700 | { |
2397 | unsigned long bytes; | 1701 | return 0; |
2398 | |||
2399 | bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); | ||
2400 | |||
2401 | return bytes == sizeof(*frame); | ||
2402 | } | 1702 | } |
1703 | #endif | ||
2403 | 1704 | ||
2404 | static void | 1705 | static void |
2405 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1706 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) |
@@ -2415,11 +1716,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
2415 | callchain_store(entry, PERF_CONTEXT_USER); | 1716 | callchain_store(entry, PERF_CONTEXT_USER); |
2416 | callchain_store(entry, regs->ip); | 1717 | callchain_store(entry, regs->ip); |
2417 | 1718 | ||
1719 | if (perf_callchain_user32(regs, entry)) | ||
1720 | return; | ||
1721 | |||
2418 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | 1722 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
1723 | unsigned long bytes; | ||
2419 | frame.next_frame = NULL; | 1724 | frame.next_frame = NULL; |
2420 | frame.return_address = 0; | 1725 | frame.return_address = 0; |
2421 | 1726 | ||
2422 | if (!copy_stack_frame(fp, &frame)) | 1727 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); |
1728 | if (bytes != sizeof(frame)) | ||
2423 | break; | 1729 | break; |
2424 | 1730 | ||
2425 | if ((unsigned long)fp < regs->sp) | 1731 | if ((unsigned long)fp < regs->sp) |
@@ -2440,9 +1746,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
2440 | 1746 | ||
2441 | is_user = user_mode(regs); | 1747 | is_user = user_mode(regs); |
2442 | 1748 | ||
2443 | if (!current || current->pid == 0) | ||
2444 | return; | ||
2445 | |||
2446 | if (is_user && current->state != TASK_RUNNING) | 1749 | if (is_user && current->state != TASK_RUNNING) |
2447 | return; | 1750 | return; |
2448 | 1751 | ||
@@ -2457,6 +1760,11 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
2457 | { | 1760 | { |
2458 | struct perf_callchain_entry *entry; | 1761 | struct perf_callchain_entry *entry; |
2459 | 1762 | ||
1763 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
1764 | /* TODO: We don't support guest os callchain now */ | ||
1765 | return NULL; | ||
1766 | } | ||
1767 | |||
2460 | if (in_nmi()) | 1768 | if (in_nmi()) |
2461 | entry = &__get_cpu_var(pmc_nmi_entry); | 1769 | entry = &__get_cpu_var(pmc_nmi_entry); |
2462 | else | 1770 | else |
@@ -2469,7 +1777,36 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
2469 | return entry; | 1777 | return entry; |
2470 | } | 1778 | } |
2471 | 1779 | ||
2472 | void hw_perf_event_setup_online(int cpu) | 1780 | unsigned long perf_instruction_pointer(struct pt_regs *regs) |
2473 | { | 1781 | { |
2474 | init_debug_store_on_cpu(cpu); | 1782 | unsigned long ip; |
1783 | |||
1784 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) | ||
1785 | ip = perf_guest_cbs->get_guest_ip(); | ||
1786 | else | ||
1787 | ip = instruction_pointer(regs); | ||
1788 | |||
1789 | return ip; | ||
1790 | } | ||
1791 | |||
1792 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
1793 | { | ||
1794 | int misc = 0; | ||
1795 | |||
1796 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
1797 | if (perf_guest_cbs->is_user_mode()) | ||
1798 | misc |= PERF_RECORD_MISC_GUEST_USER; | ||
1799 | else | ||
1800 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; | ||
1801 | } else { | ||
1802 | if (user_mode(regs)) | ||
1803 | misc |= PERF_RECORD_MISC_USER; | ||
1804 | else | ||
1805 | misc |= PERF_RECORD_MISC_KERNEL; | ||
1806 | } | ||
1807 | |||
1808 | if (regs->flags & PERF_EFLAGS_EXACT) | ||
1809 | misc |= PERF_RECORD_MISC_EXACT_IP; | ||
1810 | |||
1811 | return misc; | ||
2475 | } | 1812 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c new file mode 100644 index 000000000000..c2897b7b4a3b --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -0,0 +1,420 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_AMD | ||
2 | |||
3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); | ||
4 | |||
5 | static __initconst const u64 amd_hw_cache_event_ids | ||
6 | [PERF_COUNT_HW_CACHE_MAX] | ||
7 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
9 | { | ||
10 | [ C(L1D) ] = { | ||
11 | [ C(OP_READ) ] = { | ||
12 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
13 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | ||
14 | }, | ||
15 | [ C(OP_WRITE) ] = { | ||
16 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ | ||
17 | [ C(RESULT_MISS) ] = 0, | ||
18 | }, | ||
19 | [ C(OP_PREFETCH) ] = { | ||
20 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ | ||
21 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ | ||
22 | }, | ||
23 | }, | ||
24 | [ C(L1I ) ] = { | ||
25 | [ C(OP_READ) ] = { | ||
26 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ | ||
27 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ | ||
28 | }, | ||
29 | [ C(OP_WRITE) ] = { | ||
30 | [ C(RESULT_ACCESS) ] = -1, | ||
31 | [ C(RESULT_MISS) ] = -1, | ||
32 | }, | ||
33 | [ C(OP_PREFETCH) ] = { | ||
34 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ | ||
35 | [ C(RESULT_MISS) ] = 0, | ||
36 | }, | ||
37 | }, | ||
38 | [ C(LL ) ] = { | ||
39 | [ C(OP_READ) ] = { | ||
40 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ | ||
41 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ | ||
42 | }, | ||
43 | [ C(OP_WRITE) ] = { | ||
44 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ | ||
45 | [ C(RESULT_MISS) ] = 0, | ||
46 | }, | ||
47 | [ C(OP_PREFETCH) ] = { | ||
48 | [ C(RESULT_ACCESS) ] = 0, | ||
49 | [ C(RESULT_MISS) ] = 0, | ||
50 | }, | ||
51 | }, | ||
52 | [ C(DTLB) ] = { | ||
53 | [ C(OP_READ) ] = { | ||
54 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
55 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ | ||
56 | }, | ||
57 | [ C(OP_WRITE) ] = { | ||
58 | [ C(RESULT_ACCESS) ] = 0, | ||
59 | [ C(RESULT_MISS) ] = 0, | ||
60 | }, | ||
61 | [ C(OP_PREFETCH) ] = { | ||
62 | [ C(RESULT_ACCESS) ] = 0, | ||
63 | [ C(RESULT_MISS) ] = 0, | ||
64 | }, | ||
65 | }, | ||
66 | [ C(ITLB) ] = { | ||
67 | [ C(OP_READ) ] = { | ||
68 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | ||
69 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | ||
70 | }, | ||
71 | [ C(OP_WRITE) ] = { | ||
72 | [ C(RESULT_ACCESS) ] = -1, | ||
73 | [ C(RESULT_MISS) ] = -1, | ||
74 | }, | ||
75 | [ C(OP_PREFETCH) ] = { | ||
76 | [ C(RESULT_ACCESS) ] = -1, | ||
77 | [ C(RESULT_MISS) ] = -1, | ||
78 | }, | ||
79 | }, | ||
80 | [ C(BPU ) ] = { | ||
81 | [ C(OP_READ) ] = { | ||
82 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ | ||
83 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ | ||
84 | }, | ||
85 | [ C(OP_WRITE) ] = { | ||
86 | [ C(RESULT_ACCESS) ] = -1, | ||
87 | [ C(RESULT_MISS) ] = -1, | ||
88 | }, | ||
89 | [ C(OP_PREFETCH) ] = { | ||
90 | [ C(RESULT_ACCESS) ] = -1, | ||
91 | [ C(RESULT_MISS) ] = -1, | ||
92 | }, | ||
93 | }, | ||
94 | }; | ||
95 | |||
96 | /* | ||
97 | * AMD Performance Monitor K7 and later. | ||
98 | */ | ||
99 | static const u64 amd_perfmon_event_map[] = | ||
100 | { | ||
101 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | ||
102 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
103 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | ||
104 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | ||
105 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, | ||
106 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, | ||
107 | }; | ||
108 | |||
109 | static u64 amd_pmu_event_map(int hw_event) | ||
110 | { | ||
111 | return amd_perfmon_event_map[hw_event]; | ||
112 | } | ||
113 | |||
114 | static int amd_pmu_hw_config(struct perf_event *event) | ||
115 | { | ||
116 | int ret = x86_pmu_hw_config(event); | ||
117 | |||
118 | if (ret) | ||
119 | return ret; | ||
120 | |||
121 | if (event->attr.type != PERF_TYPE_RAW) | ||
122 | return 0; | ||
123 | |||
124 | event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; | ||
125 | |||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * AMD64 events are detected based on their event codes. | ||
131 | */ | ||
132 | static inline int amd_is_nb_event(struct hw_perf_event *hwc) | ||
133 | { | ||
134 | return (hwc->config & 0xe0) == 0xe0; | ||
135 | } | ||
136 | |||
137 | static inline int amd_has_nb(struct cpu_hw_events *cpuc) | ||
138 | { | ||
139 | struct amd_nb *nb = cpuc->amd_nb; | ||
140 | |||
141 | return nb && nb->nb_id != -1; | ||
142 | } | ||
143 | |||
144 | static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | ||
145 | struct perf_event *event) | ||
146 | { | ||
147 | struct hw_perf_event *hwc = &event->hw; | ||
148 | struct amd_nb *nb = cpuc->amd_nb; | ||
149 | int i; | ||
150 | |||
151 | /* | ||
152 | * only care about NB events | ||
153 | */ | ||
154 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) | ||
155 | return; | ||
156 | |||
157 | /* | ||
158 | * need to scan whole list because event may not have | ||
159 | * been assigned during scheduling | ||
160 | * | ||
161 | * no race condition possible because event can only | ||
162 | * be removed on one CPU at a time AND PMU is disabled | ||
163 | * when we come here | ||
164 | */ | ||
165 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
166 | if (nb->owners[i] == event) { | ||
167 | cmpxchg(nb->owners+i, event, NULL); | ||
168 | break; | ||
169 | } | ||
170 | } | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * AMD64 NorthBridge events need special treatment because | ||
175 | * counter access needs to be synchronized across all cores | ||
176 | * of a package. Refer to BKDG section 3.12 | ||
177 | * | ||
178 | * NB events are events measuring L3 cache, Hypertransport | ||
179 | * traffic. They are identified by an event code >= 0xe00. | ||
180 | * They measure events on the NorthBride which is shared | ||
181 | * by all cores on a package. NB events are counted on a | ||
182 | * shared set of counters. When a NB event is programmed | ||
183 | * in a counter, the data actually comes from a shared | ||
184 | * counter. Thus, access to those counters needs to be | ||
185 | * synchronized. | ||
186 | * | ||
187 | * We implement the synchronization such that no two cores | ||
188 | * can be measuring NB events using the same counters. Thus, | ||
189 | * we maintain a per-NB allocation table. The available slot | ||
190 | * is propagated using the event_constraint structure. | ||
191 | * | ||
192 | * We provide only one choice for each NB event based on | ||
193 | * the fact that only NB events have restrictions. Consequently, | ||
194 | * if a counter is available, there is a guarantee the NB event | ||
195 | * will be assigned to it. If no slot is available, an empty | ||
196 | * constraint is returned and scheduling will eventually fail | ||
197 | * for this event. | ||
198 | * | ||
199 | * Note that all cores attached the same NB compete for the same | ||
200 | * counters to host NB events, this is why we use atomic ops. Some | ||
201 | * multi-chip CPUs may have more than one NB. | ||
202 | * | ||
203 | * Given that resources are allocated (cmpxchg), they must be | ||
204 | * eventually freed for others to use. This is accomplished by | ||
205 | * calling amd_put_event_constraints(). | ||
206 | * | ||
207 | * Non NB events are not impacted by this restriction. | ||
208 | */ | ||
209 | static struct event_constraint * | ||
210 | amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
211 | { | ||
212 | struct hw_perf_event *hwc = &event->hw; | ||
213 | struct amd_nb *nb = cpuc->amd_nb; | ||
214 | struct perf_event *old = NULL; | ||
215 | int max = x86_pmu.num_counters; | ||
216 | int i, j, k = -1; | ||
217 | |||
218 | /* | ||
219 | * if not NB event or no NB, then no constraints | ||
220 | */ | ||
221 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) | ||
222 | return &unconstrained; | ||
223 | |||
224 | /* | ||
225 | * detect if already present, if so reuse | ||
226 | * | ||
227 | * cannot merge with actual allocation | ||
228 | * because of possible holes | ||
229 | * | ||
230 | * event can already be present yet not assigned (in hwc->idx) | ||
231 | * because of successive calls to x86_schedule_events() from | ||
232 | * hw_perf_group_sched_in() without hw_perf_enable() | ||
233 | */ | ||
234 | for (i = 0; i < max; i++) { | ||
235 | /* | ||
236 | * keep track of first free slot | ||
237 | */ | ||
238 | if (k == -1 && !nb->owners[i]) | ||
239 | k = i; | ||
240 | |||
241 | /* already present, reuse */ | ||
242 | if (nb->owners[i] == event) | ||
243 | goto done; | ||
244 | } | ||
245 | /* | ||
246 | * not present, so grab a new slot | ||
247 | * starting either at: | ||
248 | */ | ||
249 | if (hwc->idx != -1) { | ||
250 | /* previous assignment */ | ||
251 | i = hwc->idx; | ||
252 | } else if (k != -1) { | ||
253 | /* start from free slot found */ | ||
254 | i = k; | ||
255 | } else { | ||
256 | /* | ||
257 | * event not found, no slot found in | ||
258 | * first pass, try again from the | ||
259 | * beginning | ||
260 | */ | ||
261 | i = 0; | ||
262 | } | ||
263 | j = i; | ||
264 | do { | ||
265 | old = cmpxchg(nb->owners+i, NULL, event); | ||
266 | if (!old) | ||
267 | break; | ||
268 | if (++i == max) | ||
269 | i = 0; | ||
270 | } while (i != j); | ||
271 | done: | ||
272 | if (!old) | ||
273 | return &nb->event_constraints[i]; | ||
274 | |||
275 | return &emptyconstraint; | ||
276 | } | ||
277 | |||
278 | static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | ||
279 | { | ||
280 | struct amd_nb *nb; | ||
281 | int i; | ||
282 | |||
283 | nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL); | ||
284 | if (!nb) | ||
285 | return NULL; | ||
286 | |||
287 | memset(nb, 0, sizeof(*nb)); | ||
288 | nb->nb_id = nb_id; | ||
289 | |||
290 | /* | ||
291 | * initialize all possible NB constraints | ||
292 | */ | ||
293 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
294 | __set_bit(i, nb->event_constraints[i].idxmsk); | ||
295 | nb->event_constraints[i].weight = 1; | ||
296 | } | ||
297 | return nb; | ||
298 | } | ||
299 | |||
300 | static int amd_pmu_cpu_prepare(int cpu) | ||
301 | { | ||
302 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
303 | |||
304 | WARN_ON_ONCE(cpuc->amd_nb); | ||
305 | |||
306 | if (boot_cpu_data.x86_max_cores < 2) | ||
307 | return NOTIFY_OK; | ||
308 | |||
309 | cpuc->amd_nb = amd_alloc_nb(cpu, -1); | ||
310 | if (!cpuc->amd_nb) | ||
311 | return NOTIFY_BAD; | ||
312 | |||
313 | return NOTIFY_OK; | ||
314 | } | ||
315 | |||
316 | static void amd_pmu_cpu_starting(int cpu) | ||
317 | { | ||
318 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
319 | struct amd_nb *nb; | ||
320 | int i, nb_id; | ||
321 | |||
322 | if (boot_cpu_data.x86_max_cores < 2) | ||
323 | return; | ||
324 | |||
325 | nb_id = amd_get_nb_id(cpu); | ||
326 | WARN_ON_ONCE(nb_id == BAD_APICID); | ||
327 | |||
328 | raw_spin_lock(&amd_nb_lock); | ||
329 | |||
330 | for_each_online_cpu(i) { | ||
331 | nb = per_cpu(cpu_hw_events, i).amd_nb; | ||
332 | if (WARN_ON_ONCE(!nb)) | ||
333 | continue; | ||
334 | |||
335 | if (nb->nb_id == nb_id) { | ||
336 | kfree(cpuc->amd_nb); | ||
337 | cpuc->amd_nb = nb; | ||
338 | break; | ||
339 | } | ||
340 | } | ||
341 | |||
342 | cpuc->amd_nb->nb_id = nb_id; | ||
343 | cpuc->amd_nb->refcnt++; | ||
344 | |||
345 | raw_spin_unlock(&amd_nb_lock); | ||
346 | } | ||
347 | |||
348 | static void amd_pmu_cpu_dead(int cpu) | ||
349 | { | ||
350 | struct cpu_hw_events *cpuhw; | ||
351 | |||
352 | if (boot_cpu_data.x86_max_cores < 2) | ||
353 | return; | ||
354 | |||
355 | cpuhw = &per_cpu(cpu_hw_events, cpu); | ||
356 | |||
357 | raw_spin_lock(&amd_nb_lock); | ||
358 | |||
359 | if (cpuhw->amd_nb) { | ||
360 | struct amd_nb *nb = cpuhw->amd_nb; | ||
361 | |||
362 | if (nb->nb_id == -1 || --nb->refcnt == 0) | ||
363 | kfree(nb); | ||
364 | |||
365 | cpuhw->amd_nb = NULL; | ||
366 | } | ||
367 | |||
368 | raw_spin_unlock(&amd_nb_lock); | ||
369 | } | ||
370 | |||
371 | static __initconst const struct x86_pmu amd_pmu = { | ||
372 | .name = "AMD", | ||
373 | .handle_irq = x86_pmu_handle_irq, | ||
374 | .disable_all = x86_pmu_disable_all, | ||
375 | .enable_all = x86_pmu_enable_all, | ||
376 | .enable = x86_pmu_enable_event, | ||
377 | .disable = x86_pmu_disable_event, | ||
378 | .hw_config = amd_pmu_hw_config, | ||
379 | .schedule_events = x86_schedule_events, | ||
380 | .eventsel = MSR_K7_EVNTSEL0, | ||
381 | .perfctr = MSR_K7_PERFCTR0, | ||
382 | .event_map = amd_pmu_event_map, | ||
383 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
384 | .num_counters = 4, | ||
385 | .cntval_bits = 48, | ||
386 | .cntval_mask = (1ULL << 48) - 1, | ||
387 | .apic = 1, | ||
388 | /* use highest bit to detect overflow */ | ||
389 | .max_period = (1ULL << 47) - 1, | ||
390 | .get_event_constraints = amd_get_event_constraints, | ||
391 | .put_event_constraints = amd_put_event_constraints, | ||
392 | |||
393 | .cpu_prepare = amd_pmu_cpu_prepare, | ||
394 | .cpu_starting = amd_pmu_cpu_starting, | ||
395 | .cpu_dead = amd_pmu_cpu_dead, | ||
396 | }; | ||
397 | |||
398 | static __init int amd_pmu_init(void) | ||
399 | { | ||
400 | /* Performance-monitoring supported from K7 and later: */ | ||
401 | if (boot_cpu_data.x86 < 6) | ||
402 | return -ENODEV; | ||
403 | |||
404 | x86_pmu = amd_pmu; | ||
405 | |||
406 | /* Events are common for all AMDs */ | ||
407 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | ||
408 | sizeof(hw_cache_event_ids)); | ||
409 | |||
410 | return 0; | ||
411 | } | ||
412 | |||
413 | #else /* CONFIG_CPU_SUP_AMD */ | ||
414 | |||
415 | static int amd_pmu_init(void) | ||
416 | { | ||
417 | return 0; | ||
418 | } | ||
419 | |||
420 | #endif | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c new file mode 100644 index 000000000000..ee05c90012d2 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -0,0 +1,1056 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* | ||
4 | * Intel PerfMon, used on Core and later. | ||
5 | */ | ||
6 | static const u64 intel_perfmon_event_map[] = | ||
7 | { | ||
8 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | ||
9 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
10 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | ||
11 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | ||
12 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
13 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
14 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | ||
15 | }; | ||
16 | |||
17 | static struct event_constraint intel_core_event_constraints[] = | ||
18 | { | ||
19 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
20 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
21 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
22 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
23 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
24 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ | ||
25 | EVENT_CONSTRAINT_END | ||
26 | }; | ||
27 | |||
28 | static struct event_constraint intel_core2_event_constraints[] = | ||
29 | { | ||
30 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
31 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
32 | /* | ||
33 | * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event | ||
34 | * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed | ||
35 | * ratio between these counters. | ||
36 | */ | ||
37 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
38 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
39 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
40 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
41 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
42 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
43 | INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ | ||
44 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
45 | INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ | ||
46 | INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */ | ||
47 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ | ||
48 | EVENT_CONSTRAINT_END | ||
49 | }; | ||
50 | |||
51 | static struct event_constraint intel_nehalem_event_constraints[] = | ||
52 | { | ||
53 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
54 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
55 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
56 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | ||
57 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | ||
58 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | ||
59 | INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ | ||
60 | INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ | ||
61 | INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ | ||
62 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
63 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | ||
64 | EVENT_CONSTRAINT_END | ||
65 | }; | ||
66 | |||
67 | static struct event_constraint intel_westmere_event_constraints[] = | ||
68 | { | ||
69 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
70 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
71 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
72 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
73 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ | ||
74 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | ||
75 | INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */ | ||
76 | EVENT_CONSTRAINT_END | ||
77 | }; | ||
78 | |||
79 | static struct event_constraint intel_gen_event_constraints[] = | ||
80 | { | ||
81 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
82 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
83 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
84 | EVENT_CONSTRAINT_END | ||
85 | }; | ||
86 | |||
87 | static u64 intel_pmu_event_map(int hw_event) | ||
88 | { | ||
89 | return intel_perfmon_event_map[hw_event]; | ||
90 | } | ||
91 | |||
92 | static __initconst const u64 westmere_hw_cache_event_ids | ||
93 | [PERF_COUNT_HW_CACHE_MAX] | ||
94 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
95 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
96 | { | ||
97 | [ C(L1D) ] = { | ||
98 | [ C(OP_READ) ] = { | ||
99 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ | ||
100 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ | ||
101 | }, | ||
102 | [ C(OP_WRITE) ] = { | ||
103 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ | ||
104 | [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ | ||
105 | }, | ||
106 | [ C(OP_PREFETCH) ] = { | ||
107 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
108 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
109 | }, | ||
110 | }, | ||
111 | [ C(L1I ) ] = { | ||
112 | [ C(OP_READ) ] = { | ||
113 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
114 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
115 | }, | ||
116 | [ C(OP_WRITE) ] = { | ||
117 | [ C(RESULT_ACCESS) ] = -1, | ||
118 | [ C(RESULT_MISS) ] = -1, | ||
119 | }, | ||
120 | [ C(OP_PREFETCH) ] = { | ||
121 | [ C(RESULT_ACCESS) ] = 0x0, | ||
122 | [ C(RESULT_MISS) ] = 0x0, | ||
123 | }, | ||
124 | }, | ||
125 | [ C(LL ) ] = { | ||
126 | [ C(OP_READ) ] = { | ||
127 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
128 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
129 | }, | ||
130 | [ C(OP_WRITE) ] = { | ||
131 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
132 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
133 | }, | ||
134 | [ C(OP_PREFETCH) ] = { | ||
135 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
136 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
137 | }, | ||
138 | }, | ||
139 | [ C(DTLB) ] = { | ||
140 | [ C(OP_READ) ] = { | ||
141 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ | ||
142 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
143 | }, | ||
144 | [ C(OP_WRITE) ] = { | ||
145 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ | ||
146 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
147 | }, | ||
148 | [ C(OP_PREFETCH) ] = { | ||
149 | [ C(RESULT_ACCESS) ] = 0x0, | ||
150 | [ C(RESULT_MISS) ] = 0x0, | ||
151 | }, | ||
152 | }, | ||
153 | [ C(ITLB) ] = { | ||
154 | [ C(OP_READ) ] = { | ||
155 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
156 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ | ||
157 | }, | ||
158 | [ C(OP_WRITE) ] = { | ||
159 | [ C(RESULT_ACCESS) ] = -1, | ||
160 | [ C(RESULT_MISS) ] = -1, | ||
161 | }, | ||
162 | [ C(OP_PREFETCH) ] = { | ||
163 | [ C(RESULT_ACCESS) ] = -1, | ||
164 | [ C(RESULT_MISS) ] = -1, | ||
165 | }, | ||
166 | }, | ||
167 | [ C(BPU ) ] = { | ||
168 | [ C(OP_READ) ] = { | ||
169 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
170 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
171 | }, | ||
172 | [ C(OP_WRITE) ] = { | ||
173 | [ C(RESULT_ACCESS) ] = -1, | ||
174 | [ C(RESULT_MISS) ] = -1, | ||
175 | }, | ||
176 | [ C(OP_PREFETCH) ] = { | ||
177 | [ C(RESULT_ACCESS) ] = -1, | ||
178 | [ C(RESULT_MISS) ] = -1, | ||
179 | }, | ||
180 | }, | ||
181 | }; | ||
182 | |||
183 | static __initconst const u64 nehalem_hw_cache_event_ids | ||
184 | [PERF_COUNT_HW_CACHE_MAX] | ||
185 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
186 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
187 | { | ||
188 | [ C(L1D) ] = { | ||
189 | [ C(OP_READ) ] = { | ||
190 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
191 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
192 | }, | ||
193 | [ C(OP_WRITE) ] = { | ||
194 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
195 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
196 | }, | ||
197 | [ C(OP_PREFETCH) ] = { | ||
198 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
199 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
200 | }, | ||
201 | }, | ||
202 | [ C(L1I ) ] = { | ||
203 | [ C(OP_READ) ] = { | ||
204 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
205 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
206 | }, | ||
207 | [ C(OP_WRITE) ] = { | ||
208 | [ C(RESULT_ACCESS) ] = -1, | ||
209 | [ C(RESULT_MISS) ] = -1, | ||
210 | }, | ||
211 | [ C(OP_PREFETCH) ] = { | ||
212 | [ C(RESULT_ACCESS) ] = 0x0, | ||
213 | [ C(RESULT_MISS) ] = 0x0, | ||
214 | }, | ||
215 | }, | ||
216 | [ C(LL ) ] = { | ||
217 | [ C(OP_READ) ] = { | ||
218 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
219 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
220 | }, | ||
221 | [ C(OP_WRITE) ] = { | ||
222 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
223 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
224 | }, | ||
225 | [ C(OP_PREFETCH) ] = { | ||
226 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
227 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
228 | }, | ||
229 | }, | ||
230 | [ C(DTLB) ] = { | ||
231 | [ C(OP_READ) ] = { | ||
232 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
233 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
234 | }, | ||
235 | [ C(OP_WRITE) ] = { | ||
236 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
237 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
238 | }, | ||
239 | [ C(OP_PREFETCH) ] = { | ||
240 | [ C(RESULT_ACCESS) ] = 0x0, | ||
241 | [ C(RESULT_MISS) ] = 0x0, | ||
242 | }, | ||
243 | }, | ||
244 | [ C(ITLB) ] = { | ||
245 | [ C(OP_READ) ] = { | ||
246 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
247 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ | ||
248 | }, | ||
249 | [ C(OP_WRITE) ] = { | ||
250 | [ C(RESULT_ACCESS) ] = -1, | ||
251 | [ C(RESULT_MISS) ] = -1, | ||
252 | }, | ||
253 | [ C(OP_PREFETCH) ] = { | ||
254 | [ C(RESULT_ACCESS) ] = -1, | ||
255 | [ C(RESULT_MISS) ] = -1, | ||
256 | }, | ||
257 | }, | ||
258 | [ C(BPU ) ] = { | ||
259 | [ C(OP_READ) ] = { | ||
260 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
261 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
262 | }, | ||
263 | [ C(OP_WRITE) ] = { | ||
264 | [ C(RESULT_ACCESS) ] = -1, | ||
265 | [ C(RESULT_MISS) ] = -1, | ||
266 | }, | ||
267 | [ C(OP_PREFETCH) ] = { | ||
268 | [ C(RESULT_ACCESS) ] = -1, | ||
269 | [ C(RESULT_MISS) ] = -1, | ||
270 | }, | ||
271 | }, | ||
272 | }; | ||
273 | |||
274 | static __initconst const u64 core2_hw_cache_event_ids | ||
275 | [PERF_COUNT_HW_CACHE_MAX] | ||
276 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
277 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
278 | { | ||
279 | [ C(L1D) ] = { | ||
280 | [ C(OP_READ) ] = { | ||
281 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
282 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
283 | }, | ||
284 | [ C(OP_WRITE) ] = { | ||
285 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
286 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
287 | }, | ||
288 | [ C(OP_PREFETCH) ] = { | ||
289 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ | ||
290 | [ C(RESULT_MISS) ] = 0, | ||
291 | }, | ||
292 | }, | ||
293 | [ C(L1I ) ] = { | ||
294 | [ C(OP_READ) ] = { | ||
295 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ | ||
296 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ | ||
297 | }, | ||
298 | [ C(OP_WRITE) ] = { | ||
299 | [ C(RESULT_ACCESS) ] = -1, | ||
300 | [ C(RESULT_MISS) ] = -1, | ||
301 | }, | ||
302 | [ C(OP_PREFETCH) ] = { | ||
303 | [ C(RESULT_ACCESS) ] = 0, | ||
304 | [ C(RESULT_MISS) ] = 0, | ||
305 | }, | ||
306 | }, | ||
307 | [ C(LL ) ] = { | ||
308 | [ C(OP_READ) ] = { | ||
309 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
310 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
311 | }, | ||
312 | [ C(OP_WRITE) ] = { | ||
313 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
314 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
315 | }, | ||
316 | [ C(OP_PREFETCH) ] = { | ||
317 | [ C(RESULT_ACCESS) ] = 0, | ||
318 | [ C(RESULT_MISS) ] = 0, | ||
319 | }, | ||
320 | }, | ||
321 | [ C(DTLB) ] = { | ||
322 | [ C(OP_READ) ] = { | ||
323 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
324 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ | ||
325 | }, | ||
326 | [ C(OP_WRITE) ] = { | ||
327 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
328 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ | ||
329 | }, | ||
330 | [ C(OP_PREFETCH) ] = { | ||
331 | [ C(RESULT_ACCESS) ] = 0, | ||
332 | [ C(RESULT_MISS) ] = 0, | ||
333 | }, | ||
334 | }, | ||
335 | [ C(ITLB) ] = { | ||
336 | [ C(OP_READ) ] = { | ||
337 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
338 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ | ||
339 | }, | ||
340 | [ C(OP_WRITE) ] = { | ||
341 | [ C(RESULT_ACCESS) ] = -1, | ||
342 | [ C(RESULT_MISS) ] = -1, | ||
343 | }, | ||
344 | [ C(OP_PREFETCH) ] = { | ||
345 | [ C(RESULT_ACCESS) ] = -1, | ||
346 | [ C(RESULT_MISS) ] = -1, | ||
347 | }, | ||
348 | }, | ||
349 | [ C(BPU ) ] = { | ||
350 | [ C(OP_READ) ] = { | ||
351 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
352 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
353 | }, | ||
354 | [ C(OP_WRITE) ] = { | ||
355 | [ C(RESULT_ACCESS) ] = -1, | ||
356 | [ C(RESULT_MISS) ] = -1, | ||
357 | }, | ||
358 | [ C(OP_PREFETCH) ] = { | ||
359 | [ C(RESULT_ACCESS) ] = -1, | ||
360 | [ C(RESULT_MISS) ] = -1, | ||
361 | }, | ||
362 | }, | ||
363 | }; | ||
364 | |||
365 | static __initconst const u64 atom_hw_cache_event_ids | ||
366 | [PERF_COUNT_HW_CACHE_MAX] | ||
367 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
368 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
369 | { | ||
370 | [ C(L1D) ] = { | ||
371 | [ C(OP_READ) ] = { | ||
372 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ | ||
373 | [ C(RESULT_MISS) ] = 0, | ||
374 | }, | ||
375 | [ C(OP_WRITE) ] = { | ||
376 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ | ||
377 | [ C(RESULT_MISS) ] = 0, | ||
378 | }, | ||
379 | [ C(OP_PREFETCH) ] = { | ||
380 | [ C(RESULT_ACCESS) ] = 0x0, | ||
381 | [ C(RESULT_MISS) ] = 0, | ||
382 | }, | ||
383 | }, | ||
384 | [ C(L1I ) ] = { | ||
385 | [ C(OP_READ) ] = { | ||
386 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
387 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
388 | }, | ||
389 | [ C(OP_WRITE) ] = { | ||
390 | [ C(RESULT_ACCESS) ] = -1, | ||
391 | [ C(RESULT_MISS) ] = -1, | ||
392 | }, | ||
393 | [ C(OP_PREFETCH) ] = { | ||
394 | [ C(RESULT_ACCESS) ] = 0, | ||
395 | [ C(RESULT_MISS) ] = 0, | ||
396 | }, | ||
397 | }, | ||
398 | [ C(LL ) ] = { | ||
399 | [ C(OP_READ) ] = { | ||
400 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
401 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
402 | }, | ||
403 | [ C(OP_WRITE) ] = { | ||
404 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
405 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
406 | }, | ||
407 | [ C(OP_PREFETCH) ] = { | ||
408 | [ C(RESULT_ACCESS) ] = 0, | ||
409 | [ C(RESULT_MISS) ] = 0, | ||
410 | }, | ||
411 | }, | ||
412 | [ C(DTLB) ] = { | ||
413 | [ C(OP_READ) ] = { | ||
414 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ | ||
415 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ | ||
416 | }, | ||
417 | [ C(OP_WRITE) ] = { | ||
418 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ | ||
419 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ | ||
420 | }, | ||
421 | [ C(OP_PREFETCH) ] = { | ||
422 | [ C(RESULT_ACCESS) ] = 0, | ||
423 | [ C(RESULT_MISS) ] = 0, | ||
424 | }, | ||
425 | }, | ||
426 | [ C(ITLB) ] = { | ||
427 | [ C(OP_READ) ] = { | ||
428 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
429 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ | ||
430 | }, | ||
431 | [ C(OP_WRITE) ] = { | ||
432 | [ C(RESULT_ACCESS) ] = -1, | ||
433 | [ C(RESULT_MISS) ] = -1, | ||
434 | }, | ||
435 | [ C(OP_PREFETCH) ] = { | ||
436 | [ C(RESULT_ACCESS) ] = -1, | ||
437 | [ C(RESULT_MISS) ] = -1, | ||
438 | }, | ||
439 | }, | ||
440 | [ C(BPU ) ] = { | ||
441 | [ C(OP_READ) ] = { | ||
442 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
443 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
444 | }, | ||
445 | [ C(OP_WRITE) ] = { | ||
446 | [ C(RESULT_ACCESS) ] = -1, | ||
447 | [ C(RESULT_MISS) ] = -1, | ||
448 | }, | ||
449 | [ C(OP_PREFETCH) ] = { | ||
450 | [ C(RESULT_ACCESS) ] = -1, | ||
451 | [ C(RESULT_MISS) ] = -1, | ||
452 | }, | ||
453 | }, | ||
454 | }; | ||
455 | |||
456 | static void intel_pmu_disable_all(void) | ||
457 | { | ||
458 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
459 | |||
460 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | ||
461 | |||
462 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | ||
463 | intel_pmu_disable_bts(); | ||
464 | |||
465 | intel_pmu_pebs_disable_all(); | ||
466 | intel_pmu_lbr_disable_all(); | ||
467 | } | ||
468 | |||
469 | static void intel_pmu_enable_all(int added) | ||
470 | { | ||
471 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
472 | |||
473 | intel_pmu_pebs_enable_all(); | ||
474 | intel_pmu_lbr_enable_all(); | ||
475 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
476 | |||
477 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | ||
478 | struct perf_event *event = | ||
479 | cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
480 | |||
481 | if (WARN_ON_ONCE(!event)) | ||
482 | return; | ||
483 | |||
484 | intel_pmu_enable_bts(event->hw.config); | ||
485 | } | ||
486 | } | ||
487 | |||
488 | /* | ||
489 | * Workaround for: | ||
490 | * Intel Errata AAK100 (model 26) | ||
491 | * Intel Errata AAP53 (model 30) | ||
492 | * Intel Errata BD53 (model 44) | ||
493 | * | ||
494 | * The official story: | ||
495 | * These chips need to be 'reset' when adding counters by programming the | ||
496 | * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either | ||
497 | * in sequence on the same PMC or on different PMCs. | ||
498 | * | ||
499 | * In practise it appears some of these events do in fact count, and | ||
500 | * we need to programm all 4 events. | ||
501 | */ | ||
502 | static void intel_pmu_nhm_workaround(void) | ||
503 | { | ||
504 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
505 | static const unsigned long nhm_magic[4] = { | ||
506 | 0x4300B5, | ||
507 | 0x4300D2, | ||
508 | 0x4300B1, | ||
509 | 0x4300B1 | ||
510 | }; | ||
511 | struct perf_event *event; | ||
512 | int i; | ||
513 | |||
514 | /* | ||
515 | * The Errata requires below steps: | ||
516 | * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; | ||
517 | * 2) Configure 4 PERFEVTSELx with the magic events and clear | ||
518 | * the corresponding PMCx; | ||
519 | * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; | ||
520 | * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; | ||
521 | * 5) Clear 4 pairs of ERFEVTSELx and PMCx; | ||
522 | */ | ||
523 | |||
524 | /* | ||
525 | * The real steps we choose are a little different from above. | ||
526 | * A) To reduce MSR operations, we don't run step 1) as they | ||
527 | * are already cleared before this function is called; | ||
528 | * B) Call x86_perf_event_update to save PMCx before configuring | ||
529 | * PERFEVTSELx with magic number; | ||
530 | * C) With step 5), we do clear only when the PERFEVTSELx is | ||
531 | * not used currently. | ||
532 | * D) Call x86_perf_event_set_period to restore PMCx; | ||
533 | */ | ||
534 | |||
535 | /* We always operate 4 pairs of PERF Counters */ | ||
536 | for (i = 0; i < 4; i++) { | ||
537 | event = cpuc->events[i]; | ||
538 | if (event) | ||
539 | x86_perf_event_update(event); | ||
540 | } | ||
541 | |||
542 | for (i = 0; i < 4; i++) { | ||
543 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); | ||
544 | wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); | ||
545 | } | ||
546 | |||
547 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); | ||
548 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); | ||
549 | |||
550 | for (i = 0; i < 4; i++) { | ||
551 | event = cpuc->events[i]; | ||
552 | |||
553 | if (event) { | ||
554 | x86_perf_event_set_period(event); | ||
555 | __x86_pmu_enable_event(&event->hw, | ||
556 | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
557 | } else | ||
558 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); | ||
559 | } | ||
560 | } | ||
561 | |||
562 | static void intel_pmu_nhm_enable_all(int added) | ||
563 | { | ||
564 | if (added) | ||
565 | intel_pmu_nhm_workaround(); | ||
566 | intel_pmu_enable_all(added); | ||
567 | } | ||
568 | |||
569 | static inline u64 intel_pmu_get_status(void) | ||
570 | { | ||
571 | u64 status; | ||
572 | |||
573 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
574 | |||
575 | return status; | ||
576 | } | ||
577 | |||
578 | static inline void intel_pmu_ack_status(u64 ack) | ||
579 | { | ||
580 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | ||
581 | } | ||
582 | |||
583 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) | ||
584 | { | ||
585 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | ||
586 | u64 ctrl_val, mask; | ||
587 | |||
588 | mask = 0xfULL << (idx * 4); | ||
589 | |||
590 | rdmsrl(hwc->config_base, ctrl_val); | ||
591 | ctrl_val &= ~mask; | ||
592 | wrmsrl(hwc->config_base, ctrl_val); | ||
593 | } | ||
594 | |||
595 | static void intel_pmu_disable_event(struct perf_event *event) | ||
596 | { | ||
597 | struct hw_perf_event *hwc = &event->hw; | ||
598 | |||
599 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { | ||
600 | intel_pmu_disable_bts(); | ||
601 | intel_pmu_drain_bts_buffer(); | ||
602 | return; | ||
603 | } | ||
604 | |||
605 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
606 | intel_pmu_disable_fixed(hwc); | ||
607 | return; | ||
608 | } | ||
609 | |||
610 | x86_pmu_disable_event(event); | ||
611 | |||
612 | if (unlikely(event->attr.precise_ip)) | ||
613 | intel_pmu_pebs_disable(event); | ||
614 | } | ||
615 | |||
616 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) | ||
617 | { | ||
618 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | ||
619 | u64 ctrl_val, bits, mask; | ||
620 | |||
621 | /* | ||
622 | * Enable IRQ generation (0x8), | ||
623 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | ||
624 | * if requested: | ||
625 | */ | ||
626 | bits = 0x8ULL; | ||
627 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
628 | bits |= 0x2; | ||
629 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
630 | bits |= 0x1; | ||
631 | |||
632 | /* | ||
633 | * ANY bit is supported in v3 and up | ||
634 | */ | ||
635 | if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) | ||
636 | bits |= 0x4; | ||
637 | |||
638 | bits <<= (idx * 4); | ||
639 | mask = 0xfULL << (idx * 4); | ||
640 | |||
641 | rdmsrl(hwc->config_base, ctrl_val); | ||
642 | ctrl_val &= ~mask; | ||
643 | ctrl_val |= bits; | ||
644 | wrmsrl(hwc->config_base, ctrl_val); | ||
645 | } | ||
646 | |||
647 | static void intel_pmu_enable_event(struct perf_event *event) | ||
648 | { | ||
649 | struct hw_perf_event *hwc = &event->hw; | ||
650 | |||
651 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { | ||
652 | if (!__get_cpu_var(cpu_hw_events).enabled) | ||
653 | return; | ||
654 | |||
655 | intel_pmu_enable_bts(hwc->config); | ||
656 | return; | ||
657 | } | ||
658 | |||
659 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
660 | intel_pmu_enable_fixed(hwc); | ||
661 | return; | ||
662 | } | ||
663 | |||
664 | if (unlikely(event->attr.precise_ip)) | ||
665 | intel_pmu_pebs_enable(event); | ||
666 | |||
667 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); | ||
668 | } | ||
669 | |||
670 | /* | ||
671 | * Save and restart an expired event. Called by NMI contexts, | ||
672 | * so it has to be careful about preempting normal event ops: | ||
673 | */ | ||
674 | static int intel_pmu_save_and_restart(struct perf_event *event) | ||
675 | { | ||
676 | x86_perf_event_update(event); | ||
677 | return x86_perf_event_set_period(event); | ||
678 | } | ||
679 | |||
680 | static void intel_pmu_reset(void) | ||
681 | { | ||
682 | struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; | ||
683 | unsigned long flags; | ||
684 | int idx; | ||
685 | |||
686 | if (!x86_pmu.num_counters) | ||
687 | return; | ||
688 | |||
689 | local_irq_save(flags); | ||
690 | |||
691 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | ||
692 | |||
693 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
694 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | ||
695 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | ||
696 | } | ||
697 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) | ||
698 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | ||
699 | |||
700 | if (ds) | ||
701 | ds->bts_index = ds->bts_buffer_base; | ||
702 | |||
703 | local_irq_restore(flags); | ||
704 | } | ||
705 | |||
706 | /* | ||
707 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
708 | * rules apply: | ||
709 | */ | ||
710 | static int intel_pmu_handle_irq(struct pt_regs *regs) | ||
711 | { | ||
712 | struct perf_sample_data data; | ||
713 | struct cpu_hw_events *cpuc; | ||
714 | int bit, loops; | ||
715 | u64 status; | ||
716 | int handled = 0; | ||
717 | |||
718 | perf_sample_data_init(&data, 0); | ||
719 | |||
720 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
721 | |||
722 | intel_pmu_disable_all(); | ||
723 | intel_pmu_drain_bts_buffer(); | ||
724 | status = intel_pmu_get_status(); | ||
725 | if (!status) { | ||
726 | intel_pmu_enable_all(0); | ||
727 | return 0; | ||
728 | } | ||
729 | |||
730 | loops = 0; | ||
731 | again: | ||
732 | intel_pmu_ack_status(status); | ||
733 | if (++loops > 100) { | ||
734 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | ||
735 | perf_event_print_debug(); | ||
736 | intel_pmu_reset(); | ||
737 | goto done; | ||
738 | } | ||
739 | |||
740 | inc_irq_stat(apic_perf_irqs); | ||
741 | |||
742 | intel_pmu_lbr_read(); | ||
743 | |||
744 | /* | ||
745 | * PEBS overflow sets bit 62 in the global status register | ||
746 | */ | ||
747 | if (__test_and_clear_bit(62, (unsigned long *)&status)) { | ||
748 | handled++; | ||
749 | x86_pmu.drain_pebs(regs); | ||
750 | } | ||
751 | |||
752 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
753 | struct perf_event *event = cpuc->events[bit]; | ||
754 | |||
755 | handled++; | ||
756 | |||
757 | if (!test_bit(bit, cpuc->active_mask)) | ||
758 | continue; | ||
759 | |||
760 | if (!intel_pmu_save_and_restart(event)) | ||
761 | continue; | ||
762 | |||
763 | data.period = event->hw.last_period; | ||
764 | |||
765 | if (perf_event_overflow(event, 1, &data, regs)) | ||
766 | x86_pmu_stop(event); | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * Repeat if there is more work to be done: | ||
771 | */ | ||
772 | status = intel_pmu_get_status(); | ||
773 | if (status) | ||
774 | goto again; | ||
775 | |||
776 | done: | ||
777 | intel_pmu_enable_all(0); | ||
778 | return handled; | ||
779 | } | ||
780 | |||
781 | static struct event_constraint * | ||
782 | intel_bts_constraints(struct perf_event *event) | ||
783 | { | ||
784 | struct hw_perf_event *hwc = &event->hw; | ||
785 | unsigned int hw_event, bts_event; | ||
786 | |||
787 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; | ||
788 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); | ||
789 | |||
790 | if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) | ||
791 | return &bts_constraint; | ||
792 | |||
793 | return NULL; | ||
794 | } | ||
795 | |||
796 | static struct event_constraint * | ||
797 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
798 | { | ||
799 | struct event_constraint *c; | ||
800 | |||
801 | c = intel_bts_constraints(event); | ||
802 | if (c) | ||
803 | return c; | ||
804 | |||
805 | c = intel_pebs_constraints(event); | ||
806 | if (c) | ||
807 | return c; | ||
808 | |||
809 | return x86_get_event_constraints(cpuc, event); | ||
810 | } | ||
811 | |||
812 | static int intel_pmu_hw_config(struct perf_event *event) | ||
813 | { | ||
814 | int ret = x86_pmu_hw_config(event); | ||
815 | |||
816 | if (ret) | ||
817 | return ret; | ||
818 | |||
819 | if (event->attr.type != PERF_TYPE_RAW) | ||
820 | return 0; | ||
821 | |||
822 | if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) | ||
823 | return 0; | ||
824 | |||
825 | if (x86_pmu.version < 3) | ||
826 | return -EINVAL; | ||
827 | |||
828 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
829 | return -EACCES; | ||
830 | |||
831 | event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; | ||
832 | |||
833 | return 0; | ||
834 | } | ||
835 | |||
836 | static __initconst const struct x86_pmu core_pmu = { | ||
837 | .name = "core", | ||
838 | .handle_irq = x86_pmu_handle_irq, | ||
839 | .disable_all = x86_pmu_disable_all, | ||
840 | .enable_all = x86_pmu_enable_all, | ||
841 | .enable = x86_pmu_enable_event, | ||
842 | .disable = x86_pmu_disable_event, | ||
843 | .hw_config = x86_pmu_hw_config, | ||
844 | .schedule_events = x86_schedule_events, | ||
845 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
846 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
847 | .event_map = intel_pmu_event_map, | ||
848 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
849 | .apic = 1, | ||
850 | /* | ||
851 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
852 | * so we install an artificial 1<<31 period regardless of | ||
853 | * the generic event period: | ||
854 | */ | ||
855 | .max_period = (1ULL << 31) - 1, | ||
856 | .get_event_constraints = intel_get_event_constraints, | ||
857 | .event_constraints = intel_core_event_constraints, | ||
858 | }; | ||
859 | |||
860 | static void intel_pmu_cpu_starting(int cpu) | ||
861 | { | ||
862 | init_debug_store_on_cpu(cpu); | ||
863 | /* | ||
864 | * Deal with CPUs that don't clear their LBRs on power-up. | ||
865 | */ | ||
866 | intel_pmu_lbr_reset(); | ||
867 | } | ||
868 | |||
869 | static void intel_pmu_cpu_dying(int cpu) | ||
870 | { | ||
871 | fini_debug_store_on_cpu(cpu); | ||
872 | } | ||
873 | |||
874 | static __initconst const struct x86_pmu intel_pmu = { | ||
875 | .name = "Intel", | ||
876 | .handle_irq = intel_pmu_handle_irq, | ||
877 | .disable_all = intel_pmu_disable_all, | ||
878 | .enable_all = intel_pmu_enable_all, | ||
879 | .enable = intel_pmu_enable_event, | ||
880 | .disable = intel_pmu_disable_event, | ||
881 | .hw_config = intel_pmu_hw_config, | ||
882 | .schedule_events = x86_schedule_events, | ||
883 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
884 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
885 | .event_map = intel_pmu_event_map, | ||
886 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
887 | .apic = 1, | ||
888 | /* | ||
889 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
890 | * so we install an artificial 1<<31 period regardless of | ||
891 | * the generic event period: | ||
892 | */ | ||
893 | .max_period = (1ULL << 31) - 1, | ||
894 | .get_event_constraints = intel_get_event_constraints, | ||
895 | |||
896 | .cpu_starting = intel_pmu_cpu_starting, | ||
897 | .cpu_dying = intel_pmu_cpu_dying, | ||
898 | }; | ||
899 | |||
900 | static void intel_clovertown_quirks(void) | ||
901 | { | ||
902 | /* | ||
903 | * PEBS is unreliable due to: | ||
904 | * | ||
905 | * AJ67 - PEBS may experience CPL leaks | ||
906 | * AJ68 - PEBS PMI may be delayed by one event | ||
907 | * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] | ||
908 | * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS | ||
909 | * | ||
910 | * AJ67 could be worked around by restricting the OS/USR flags. | ||
911 | * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. | ||
912 | * | ||
913 | * AJ106 could possibly be worked around by not allowing LBR | ||
914 | * usage from PEBS, including the fixup. | ||
915 | * AJ68 could possibly be worked around by always programming | ||
916 | * a pebs_event_reset[0] value and coping with the lost events. | ||
917 | * | ||
918 | * But taken together it might just make sense to not enable PEBS on | ||
919 | * these chips. | ||
920 | */ | ||
921 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | ||
922 | x86_pmu.pebs = 0; | ||
923 | x86_pmu.pebs_constraints = NULL; | ||
924 | } | ||
925 | |||
926 | static __init int intel_pmu_init(void) | ||
927 | { | ||
928 | union cpuid10_edx edx; | ||
929 | union cpuid10_eax eax; | ||
930 | unsigned int unused; | ||
931 | unsigned int ebx; | ||
932 | int version; | ||
933 | |||
934 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
935 | switch (boot_cpu_data.x86) { | ||
936 | case 0x6: | ||
937 | return p6_pmu_init(); | ||
938 | case 0xf: | ||
939 | return p4_pmu_init(); | ||
940 | } | ||
941 | return -ENODEV; | ||
942 | } | ||
943 | |||
944 | /* | ||
945 | * Check whether the Architectural PerfMon supports | ||
946 | * Branch Misses Retired hw_event or not. | ||
947 | */ | ||
948 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
949 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
950 | return -ENODEV; | ||
951 | |||
952 | version = eax.split.version_id; | ||
953 | if (version < 2) | ||
954 | x86_pmu = core_pmu; | ||
955 | else | ||
956 | x86_pmu = intel_pmu; | ||
957 | |||
958 | x86_pmu.version = version; | ||
959 | x86_pmu.num_counters = eax.split.num_counters; | ||
960 | x86_pmu.cntval_bits = eax.split.bit_width; | ||
961 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; | ||
962 | |||
963 | /* | ||
964 | * Quirk: v2 perfmon does not report fixed-purpose events, so | ||
965 | * assume at least 3 events: | ||
966 | */ | ||
967 | if (version > 1) | ||
968 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); | ||
969 | |||
970 | /* | ||
971 | * v2 and above have a perf capabilities MSR | ||
972 | */ | ||
973 | if (version > 1) { | ||
974 | u64 capabilities; | ||
975 | |||
976 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | ||
977 | x86_pmu.intel_cap.capabilities = capabilities; | ||
978 | } | ||
979 | |||
980 | intel_ds_init(); | ||
981 | |||
982 | /* | ||
983 | * Install the hw-cache-events table: | ||
984 | */ | ||
985 | switch (boot_cpu_data.x86_model) { | ||
986 | case 14: /* 65 nm core solo/duo, "Yonah" */ | ||
987 | pr_cont("Core events, "); | ||
988 | break; | ||
989 | |||
990 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | ||
991 | x86_pmu.quirks = intel_clovertown_quirks; | ||
992 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | ||
993 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | ||
994 | case 29: /* six-core 45 nm xeon "Dunnington" */ | ||
995 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | ||
996 | sizeof(hw_cache_event_ids)); | ||
997 | |||
998 | intel_pmu_lbr_init_core(); | ||
999 | |||
1000 | x86_pmu.event_constraints = intel_core2_event_constraints; | ||
1001 | pr_cont("Core2 events, "); | ||
1002 | break; | ||
1003 | |||
1004 | case 26: /* 45 nm nehalem, "Bloomfield" */ | ||
1005 | case 30: /* 45 nm nehalem, "Lynnfield" */ | ||
1006 | case 46: /* 45 nm nehalem-ex, "Beckton" */ | ||
1007 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | ||
1008 | sizeof(hw_cache_event_ids)); | ||
1009 | |||
1010 | intel_pmu_lbr_init_nhm(); | ||
1011 | |||
1012 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | ||
1013 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | ||
1014 | pr_cont("Nehalem events, "); | ||
1015 | break; | ||
1016 | |||
1017 | case 28: /* Atom */ | ||
1018 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
1019 | sizeof(hw_cache_event_ids)); | ||
1020 | |||
1021 | intel_pmu_lbr_init_atom(); | ||
1022 | |||
1023 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
1024 | pr_cont("Atom events, "); | ||
1025 | break; | ||
1026 | |||
1027 | case 37: /* 32 nm nehalem, "Clarkdale" */ | ||
1028 | case 44: /* 32 nm nehalem, "Gulftown" */ | ||
1029 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | ||
1030 | sizeof(hw_cache_event_ids)); | ||
1031 | |||
1032 | intel_pmu_lbr_init_nhm(); | ||
1033 | |||
1034 | x86_pmu.event_constraints = intel_westmere_event_constraints; | ||
1035 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | ||
1036 | pr_cont("Westmere events, "); | ||
1037 | break; | ||
1038 | |||
1039 | default: | ||
1040 | /* | ||
1041 | * default constraints for v2 and up | ||
1042 | */ | ||
1043 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
1044 | pr_cont("generic architected perfmon, "); | ||
1045 | } | ||
1046 | return 0; | ||
1047 | } | ||
1048 | |||
1049 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
1050 | |||
1051 | static int intel_pmu_init(void) | ||
1052 | { | ||
1053 | return 0; | ||
1054 | } | ||
1055 | |||
1056 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c new file mode 100644 index 000000000000..18018d1311cd --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -0,0 +1,641 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* The maximal number of PEBS events: */ | ||
4 | #define MAX_PEBS_EVENTS 4 | ||
5 | |||
6 | /* The size of a BTS record in bytes: */ | ||
7 | #define BTS_RECORD_SIZE 24 | ||
8 | |||
9 | #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) | ||
10 | #define PEBS_BUFFER_SIZE PAGE_SIZE | ||
11 | |||
12 | /* | ||
13 | * pebs_record_32 for p4 and core not supported | ||
14 | |||
15 | struct pebs_record_32 { | ||
16 | u32 flags, ip; | ||
17 | u32 ax, bc, cx, dx; | ||
18 | u32 si, di, bp, sp; | ||
19 | }; | ||
20 | |||
21 | */ | ||
22 | |||
23 | struct pebs_record_core { | ||
24 | u64 flags, ip; | ||
25 | u64 ax, bx, cx, dx; | ||
26 | u64 si, di, bp, sp; | ||
27 | u64 r8, r9, r10, r11; | ||
28 | u64 r12, r13, r14, r15; | ||
29 | }; | ||
30 | |||
31 | struct pebs_record_nhm { | ||
32 | u64 flags, ip; | ||
33 | u64 ax, bx, cx, dx; | ||
34 | u64 si, di, bp, sp; | ||
35 | u64 r8, r9, r10, r11; | ||
36 | u64 r12, r13, r14, r15; | ||
37 | u64 status, dla, dse, lat; | ||
38 | }; | ||
39 | |||
40 | /* | ||
41 | * A debug store configuration. | ||
42 | * | ||
43 | * We only support architectures that use 64bit fields. | ||
44 | */ | ||
45 | struct debug_store { | ||
46 | u64 bts_buffer_base; | ||
47 | u64 bts_index; | ||
48 | u64 bts_absolute_maximum; | ||
49 | u64 bts_interrupt_threshold; | ||
50 | u64 pebs_buffer_base; | ||
51 | u64 pebs_index; | ||
52 | u64 pebs_absolute_maximum; | ||
53 | u64 pebs_interrupt_threshold; | ||
54 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
55 | }; | ||
56 | |||
57 | static void init_debug_store_on_cpu(int cpu) | ||
58 | { | ||
59 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
60 | |||
61 | if (!ds) | ||
62 | return; | ||
63 | |||
64 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
65 | (u32)((u64)(unsigned long)ds), | ||
66 | (u32)((u64)(unsigned long)ds >> 32)); | ||
67 | } | ||
68 | |||
69 | static void fini_debug_store_on_cpu(int cpu) | ||
70 | { | ||
71 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
72 | return; | ||
73 | |||
74 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
75 | } | ||
76 | |||
77 | static void release_ds_buffers(void) | ||
78 | { | ||
79 | int cpu; | ||
80 | |||
81 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
82 | return; | ||
83 | |||
84 | get_online_cpus(); | ||
85 | |||
86 | for_each_online_cpu(cpu) | ||
87 | fini_debug_store_on_cpu(cpu); | ||
88 | |||
89 | for_each_possible_cpu(cpu) { | ||
90 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
91 | |||
92 | if (!ds) | ||
93 | continue; | ||
94 | |||
95 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
96 | |||
97 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | ||
98 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
99 | kfree(ds); | ||
100 | } | ||
101 | |||
102 | put_online_cpus(); | ||
103 | } | ||
104 | |||
105 | static int reserve_ds_buffers(void) | ||
106 | { | ||
107 | int cpu, err = 0; | ||
108 | |||
109 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
110 | return 0; | ||
111 | |||
112 | get_online_cpus(); | ||
113 | |||
114 | for_each_possible_cpu(cpu) { | ||
115 | struct debug_store *ds; | ||
116 | void *buffer; | ||
117 | int max, thresh; | ||
118 | |||
119 | err = -ENOMEM; | ||
120 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
121 | if (unlikely(!ds)) | ||
122 | break; | ||
123 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
124 | |||
125 | if (x86_pmu.bts) { | ||
126 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
127 | if (unlikely(!buffer)) | ||
128 | break; | ||
129 | |||
130 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; | ||
131 | thresh = max / 16; | ||
132 | |||
133 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
134 | ds->bts_index = ds->bts_buffer_base; | ||
135 | ds->bts_absolute_maximum = ds->bts_buffer_base + | ||
136 | max * BTS_RECORD_SIZE; | ||
137 | ds->bts_interrupt_threshold = ds->bts_absolute_maximum - | ||
138 | thresh * BTS_RECORD_SIZE; | ||
139 | } | ||
140 | |||
141 | if (x86_pmu.pebs) { | ||
142 | buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); | ||
143 | if (unlikely(!buffer)) | ||
144 | break; | ||
145 | |||
146 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | ||
147 | |||
148 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | ||
149 | ds->pebs_index = ds->pebs_buffer_base; | ||
150 | ds->pebs_absolute_maximum = ds->pebs_buffer_base + | ||
151 | max * x86_pmu.pebs_record_size; | ||
152 | /* | ||
153 | * Always use single record PEBS | ||
154 | */ | ||
155 | ds->pebs_interrupt_threshold = ds->pebs_buffer_base + | ||
156 | x86_pmu.pebs_record_size; | ||
157 | } | ||
158 | |||
159 | err = 0; | ||
160 | } | ||
161 | |||
162 | if (err) | ||
163 | release_ds_buffers(); | ||
164 | else { | ||
165 | for_each_online_cpu(cpu) | ||
166 | init_debug_store_on_cpu(cpu); | ||
167 | } | ||
168 | |||
169 | put_online_cpus(); | ||
170 | |||
171 | return err; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * BTS | ||
176 | */ | ||
177 | |||
178 | static struct event_constraint bts_constraint = | ||
179 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
180 | |||
181 | static void intel_pmu_enable_bts(u64 config) | ||
182 | { | ||
183 | unsigned long debugctlmsr; | ||
184 | |||
185 | debugctlmsr = get_debugctlmsr(); | ||
186 | |||
187 | debugctlmsr |= DEBUGCTLMSR_TR; | ||
188 | debugctlmsr |= DEBUGCTLMSR_BTS; | ||
189 | debugctlmsr |= DEBUGCTLMSR_BTINT; | ||
190 | |||
191 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
192 | debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; | ||
193 | |||
194 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
195 | debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; | ||
196 | |||
197 | update_debugctlmsr(debugctlmsr); | ||
198 | } | ||
199 | |||
200 | static void intel_pmu_disable_bts(void) | ||
201 | { | ||
202 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
203 | unsigned long debugctlmsr; | ||
204 | |||
205 | if (!cpuc->ds) | ||
206 | return; | ||
207 | |||
208 | debugctlmsr = get_debugctlmsr(); | ||
209 | |||
210 | debugctlmsr &= | ||
211 | ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | | ||
212 | DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); | ||
213 | |||
214 | update_debugctlmsr(debugctlmsr); | ||
215 | } | ||
216 | |||
217 | static void intel_pmu_drain_bts_buffer(void) | ||
218 | { | ||
219 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
220 | struct debug_store *ds = cpuc->ds; | ||
221 | struct bts_record { | ||
222 | u64 from; | ||
223 | u64 to; | ||
224 | u64 flags; | ||
225 | }; | ||
226 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
227 | struct bts_record *at, *top; | ||
228 | struct perf_output_handle handle; | ||
229 | struct perf_event_header header; | ||
230 | struct perf_sample_data data; | ||
231 | struct pt_regs regs; | ||
232 | |||
233 | if (!event) | ||
234 | return; | ||
235 | |||
236 | if (!ds) | ||
237 | return; | ||
238 | |||
239 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
240 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
241 | |||
242 | if (top <= at) | ||
243 | return; | ||
244 | |||
245 | ds->bts_index = ds->bts_buffer_base; | ||
246 | |||
247 | perf_sample_data_init(&data, 0); | ||
248 | data.period = event->hw.last_period; | ||
249 | regs.ip = 0; | ||
250 | |||
251 | /* | ||
252 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
253 | * We will overwrite the from and to address before we output | ||
254 | * the sample. | ||
255 | */ | ||
256 | perf_prepare_sample(&header, &data, event, ®s); | ||
257 | |||
258 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) | ||
259 | return; | ||
260 | |||
261 | for (; at < top; at++) { | ||
262 | data.ip = at->from; | ||
263 | data.addr = at->to; | ||
264 | |||
265 | perf_output_sample(&handle, &header, &data, event); | ||
266 | } | ||
267 | |||
268 | perf_output_end(&handle); | ||
269 | |||
270 | /* There's new data available. */ | ||
271 | event->hw.interrupts++; | ||
272 | event->pending_kill = POLL_IN; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * PEBS | ||
277 | */ | ||
278 | |||
279 | static struct event_constraint intel_core_pebs_events[] = { | ||
280 | PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ | ||
281 | PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ | ||
282 | PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | ||
283 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ | ||
284 | PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
285 | PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
286 | PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
287 | PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
288 | PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
289 | EVENT_CONSTRAINT_END | ||
290 | }; | ||
291 | |||
292 | static struct event_constraint intel_nehalem_pebs_events[] = { | ||
293 | PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ | ||
294 | PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ | ||
295 | PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ | ||
296 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ | ||
297 | PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
298 | PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
299 | PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
300 | PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
301 | PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
302 | EVENT_CONSTRAINT_END | ||
303 | }; | ||
304 | |||
305 | static struct event_constraint * | ||
306 | intel_pebs_constraints(struct perf_event *event) | ||
307 | { | ||
308 | struct event_constraint *c; | ||
309 | |||
310 | if (!event->attr.precise_ip) | ||
311 | return NULL; | ||
312 | |||
313 | if (x86_pmu.pebs_constraints) { | ||
314 | for_each_event_constraint(c, x86_pmu.pebs_constraints) { | ||
315 | if ((event->hw.config & c->cmask) == c->code) | ||
316 | return c; | ||
317 | } | ||
318 | } | ||
319 | |||
320 | return &emptyconstraint; | ||
321 | } | ||
322 | |||
323 | static void intel_pmu_pebs_enable(struct perf_event *event) | ||
324 | { | ||
325 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
326 | struct hw_perf_event *hwc = &event->hw; | ||
327 | |||
328 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | ||
329 | |||
330 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | ||
331 | WARN_ON_ONCE(cpuc->enabled); | ||
332 | |||
333 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
334 | intel_pmu_lbr_enable(event); | ||
335 | } | ||
336 | |||
337 | static void intel_pmu_pebs_disable(struct perf_event *event) | ||
338 | { | ||
339 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
340 | struct hw_perf_event *hwc = &event->hw; | ||
341 | |||
342 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); | ||
343 | if (cpuc->enabled) | ||
344 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
345 | |||
346 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | ||
347 | |||
348 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
349 | intel_pmu_lbr_disable(event); | ||
350 | } | ||
351 | |||
352 | static void intel_pmu_pebs_enable_all(void) | ||
353 | { | ||
354 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
355 | |||
356 | if (cpuc->pebs_enabled) | ||
357 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
358 | } | ||
359 | |||
360 | static void intel_pmu_pebs_disable_all(void) | ||
361 | { | ||
362 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
363 | |||
364 | if (cpuc->pebs_enabled) | ||
365 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | ||
366 | } | ||
367 | |||
368 | #include <asm/insn.h> | ||
369 | |||
370 | static inline bool kernel_ip(unsigned long ip) | ||
371 | { | ||
372 | #ifdef CONFIG_X86_32 | ||
373 | return ip > PAGE_OFFSET; | ||
374 | #else | ||
375 | return (long)ip < 0; | ||
376 | #endif | ||
377 | } | ||
378 | |||
379 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | ||
380 | { | ||
381 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
382 | unsigned long from = cpuc->lbr_entries[0].from; | ||
383 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | ||
384 | unsigned long ip = regs->ip; | ||
385 | |||
386 | /* | ||
387 | * We don't need to fixup if the PEBS assist is fault like | ||
388 | */ | ||
389 | if (!x86_pmu.intel_cap.pebs_trap) | ||
390 | return 1; | ||
391 | |||
392 | /* | ||
393 | * No LBR entry, no basic block, no rewinding | ||
394 | */ | ||
395 | if (!cpuc->lbr_stack.nr || !from || !to) | ||
396 | return 0; | ||
397 | |||
398 | /* | ||
399 | * Basic blocks should never cross user/kernel boundaries | ||
400 | */ | ||
401 | if (kernel_ip(ip) != kernel_ip(to)) | ||
402 | return 0; | ||
403 | |||
404 | /* | ||
405 | * unsigned math, either ip is before the start (impossible) or | ||
406 | * the basic block is larger than 1 page (sanity) | ||
407 | */ | ||
408 | if ((ip - to) > PAGE_SIZE) | ||
409 | return 0; | ||
410 | |||
411 | /* | ||
412 | * We sampled a branch insn, rewind using the LBR stack | ||
413 | */ | ||
414 | if (ip == to) { | ||
415 | regs->ip = from; | ||
416 | return 1; | ||
417 | } | ||
418 | |||
419 | do { | ||
420 | struct insn insn; | ||
421 | u8 buf[MAX_INSN_SIZE]; | ||
422 | void *kaddr; | ||
423 | |||
424 | old_to = to; | ||
425 | if (!kernel_ip(ip)) { | ||
426 | int bytes, size = MAX_INSN_SIZE; | ||
427 | |||
428 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); | ||
429 | if (bytes != size) | ||
430 | return 0; | ||
431 | |||
432 | kaddr = buf; | ||
433 | } else | ||
434 | kaddr = (void *)to; | ||
435 | |||
436 | kernel_insn_init(&insn, kaddr); | ||
437 | insn_get_length(&insn); | ||
438 | to += insn.length; | ||
439 | } while (to < ip); | ||
440 | |||
441 | if (to == ip) { | ||
442 | regs->ip = old_to; | ||
443 | return 1; | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * Even though we decoded the basic block, the instruction stream | ||
448 | * never matched the given IP, either the TO or the IP got corrupted. | ||
449 | */ | ||
450 | return 0; | ||
451 | } | ||
452 | |||
453 | static int intel_pmu_save_and_restart(struct perf_event *event); | ||
454 | |||
455 | static void __intel_pmu_pebs_event(struct perf_event *event, | ||
456 | struct pt_regs *iregs, void *__pebs) | ||
457 | { | ||
458 | /* | ||
459 | * We cast to pebs_record_core since that is a subset of | ||
460 | * both formats and we don't use the other fields in this | ||
461 | * routine. | ||
462 | */ | ||
463 | struct pebs_record_core *pebs = __pebs; | ||
464 | struct perf_sample_data data; | ||
465 | struct pt_regs regs; | ||
466 | |||
467 | if (!intel_pmu_save_and_restart(event)) | ||
468 | return; | ||
469 | |||
470 | perf_sample_data_init(&data, 0); | ||
471 | data.period = event->hw.last_period; | ||
472 | |||
473 | /* | ||
474 | * We use the interrupt regs as a base because the PEBS record | ||
475 | * does not contain a full regs set, specifically it seems to | ||
476 | * lack segment descriptors, which get used by things like | ||
477 | * user_mode(). | ||
478 | * | ||
479 | * In the simple case fix up only the IP and BP,SP regs, for | ||
480 | * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. | ||
481 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. | ||
482 | */ | ||
483 | regs = *iregs; | ||
484 | regs.ip = pebs->ip; | ||
485 | regs.bp = pebs->bp; | ||
486 | regs.sp = pebs->sp; | ||
487 | |||
488 | if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) | ||
489 | regs.flags |= PERF_EFLAGS_EXACT; | ||
490 | else | ||
491 | regs.flags &= ~PERF_EFLAGS_EXACT; | ||
492 | |||
493 | if (perf_event_overflow(event, 1, &data, ®s)) | ||
494 | x86_pmu_stop(event); | ||
495 | } | ||
496 | |||
497 | static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | ||
498 | { | ||
499 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
500 | struct debug_store *ds = cpuc->ds; | ||
501 | struct perf_event *event = cpuc->events[0]; /* PMC0 only */ | ||
502 | struct pebs_record_core *at, *top; | ||
503 | int n; | ||
504 | |||
505 | if (!ds || !x86_pmu.pebs) | ||
506 | return; | ||
507 | |||
508 | at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; | ||
509 | top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; | ||
510 | |||
511 | /* | ||
512 | * Whatever else happens, drain the thing | ||
513 | */ | ||
514 | ds->pebs_index = ds->pebs_buffer_base; | ||
515 | |||
516 | if (!test_bit(0, cpuc->active_mask)) | ||
517 | return; | ||
518 | |||
519 | WARN_ON_ONCE(!event); | ||
520 | |||
521 | if (!event->attr.precise_ip) | ||
522 | return; | ||
523 | |||
524 | n = top - at; | ||
525 | if (n <= 0) | ||
526 | return; | ||
527 | |||
528 | /* | ||
529 | * Should not happen, we program the threshold at 1 and do not | ||
530 | * set a reset value. | ||
531 | */ | ||
532 | WARN_ON_ONCE(n > 1); | ||
533 | at += n - 1; | ||
534 | |||
535 | __intel_pmu_pebs_event(event, iregs, at); | ||
536 | } | ||
537 | |||
538 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | ||
539 | { | ||
540 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
541 | struct debug_store *ds = cpuc->ds; | ||
542 | struct pebs_record_nhm *at, *top; | ||
543 | struct perf_event *event = NULL; | ||
544 | u64 status = 0; | ||
545 | int bit, n; | ||
546 | |||
547 | if (!ds || !x86_pmu.pebs) | ||
548 | return; | ||
549 | |||
550 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
551 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
552 | |||
553 | ds->pebs_index = ds->pebs_buffer_base; | ||
554 | |||
555 | n = top - at; | ||
556 | if (n <= 0) | ||
557 | return; | ||
558 | |||
559 | /* | ||
560 | * Should not happen, we program the threshold at 1 and do not | ||
561 | * set a reset value. | ||
562 | */ | ||
563 | WARN_ON_ONCE(n > MAX_PEBS_EVENTS); | ||
564 | |||
565 | for ( ; at < top; at++) { | ||
566 | for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { | ||
567 | event = cpuc->events[bit]; | ||
568 | if (!test_bit(bit, cpuc->active_mask)) | ||
569 | continue; | ||
570 | |||
571 | WARN_ON_ONCE(!event); | ||
572 | |||
573 | if (!event->attr.precise_ip) | ||
574 | continue; | ||
575 | |||
576 | if (__test_and_set_bit(bit, (unsigned long *)&status)) | ||
577 | continue; | ||
578 | |||
579 | break; | ||
580 | } | ||
581 | |||
582 | if (!event || bit >= MAX_PEBS_EVENTS) | ||
583 | continue; | ||
584 | |||
585 | __intel_pmu_pebs_event(event, iregs, at); | ||
586 | } | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * BTS, PEBS probe and setup | ||
591 | */ | ||
592 | |||
593 | static void intel_ds_init(void) | ||
594 | { | ||
595 | /* | ||
596 | * No support for 32bit formats | ||
597 | */ | ||
598 | if (!boot_cpu_has(X86_FEATURE_DTES64)) | ||
599 | return; | ||
600 | |||
601 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); | ||
602 | x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); | ||
603 | if (x86_pmu.pebs) { | ||
604 | char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; | ||
605 | int format = x86_pmu.intel_cap.pebs_format; | ||
606 | |||
607 | switch (format) { | ||
608 | case 0: | ||
609 | printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); | ||
610 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); | ||
611 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; | ||
612 | x86_pmu.pebs_constraints = intel_core_pebs_events; | ||
613 | break; | ||
614 | |||
615 | case 1: | ||
616 | printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); | ||
617 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); | ||
618 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; | ||
619 | x86_pmu.pebs_constraints = intel_nehalem_pebs_events; | ||
620 | break; | ||
621 | |||
622 | default: | ||
623 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); | ||
624 | x86_pmu.pebs = 0; | ||
625 | break; | ||
626 | } | ||
627 | } | ||
628 | } | ||
629 | |||
630 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
631 | |||
632 | static int reserve_ds_buffers(void) | ||
633 | { | ||
634 | return 0; | ||
635 | } | ||
636 | |||
637 | static void release_ds_buffers(void) | ||
638 | { | ||
639 | } | ||
640 | |||
641 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c new file mode 100644 index 000000000000..d202c1bece1a --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -0,0 +1,218 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | enum { | ||
4 | LBR_FORMAT_32 = 0x00, | ||
5 | LBR_FORMAT_LIP = 0x01, | ||
6 | LBR_FORMAT_EIP = 0x02, | ||
7 | LBR_FORMAT_EIP_FLAGS = 0x03, | ||
8 | }; | ||
9 | |||
10 | /* | ||
11 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | ||
12 | * otherwise it becomes near impossible to get a reliable stack. | ||
13 | */ | ||
14 | |||
15 | static void __intel_pmu_lbr_enable(void) | ||
16 | { | ||
17 | u64 debugctl; | ||
18 | |||
19 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
20 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | ||
21 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
22 | } | ||
23 | |||
24 | static void __intel_pmu_lbr_disable(void) | ||
25 | { | ||
26 | u64 debugctl; | ||
27 | |||
28 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
29 | debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | ||
30 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
31 | } | ||
32 | |||
33 | static void intel_pmu_lbr_reset_32(void) | ||
34 | { | ||
35 | int i; | ||
36 | |||
37 | for (i = 0; i < x86_pmu.lbr_nr; i++) | ||
38 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
39 | } | ||
40 | |||
41 | static void intel_pmu_lbr_reset_64(void) | ||
42 | { | ||
43 | int i; | ||
44 | |||
45 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
46 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
47 | wrmsrl(x86_pmu.lbr_to + i, 0); | ||
48 | } | ||
49 | } | ||
50 | |||
51 | static void intel_pmu_lbr_reset(void) | ||
52 | { | ||
53 | if (!x86_pmu.lbr_nr) | ||
54 | return; | ||
55 | |||
56 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
57 | intel_pmu_lbr_reset_32(); | ||
58 | else | ||
59 | intel_pmu_lbr_reset_64(); | ||
60 | } | ||
61 | |||
62 | static void intel_pmu_lbr_enable(struct perf_event *event) | ||
63 | { | ||
64 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
65 | |||
66 | if (!x86_pmu.lbr_nr) | ||
67 | return; | ||
68 | |||
69 | WARN_ON_ONCE(cpuc->enabled); | ||
70 | |||
71 | /* | ||
72 | * Reset the LBR stack if we changed task context to | ||
73 | * avoid data leaks. | ||
74 | */ | ||
75 | |||
76 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { | ||
77 | intel_pmu_lbr_reset(); | ||
78 | cpuc->lbr_context = event->ctx; | ||
79 | } | ||
80 | |||
81 | cpuc->lbr_users++; | ||
82 | } | ||
83 | |||
84 | static void intel_pmu_lbr_disable(struct perf_event *event) | ||
85 | { | ||
86 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
87 | |||
88 | if (!x86_pmu.lbr_nr) | ||
89 | return; | ||
90 | |||
91 | cpuc->lbr_users--; | ||
92 | WARN_ON_ONCE(cpuc->lbr_users < 0); | ||
93 | |||
94 | if (cpuc->enabled && !cpuc->lbr_users) | ||
95 | __intel_pmu_lbr_disable(); | ||
96 | } | ||
97 | |||
98 | static void intel_pmu_lbr_enable_all(void) | ||
99 | { | ||
100 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
101 | |||
102 | if (cpuc->lbr_users) | ||
103 | __intel_pmu_lbr_enable(); | ||
104 | } | ||
105 | |||
106 | static void intel_pmu_lbr_disable_all(void) | ||
107 | { | ||
108 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
109 | |||
110 | if (cpuc->lbr_users) | ||
111 | __intel_pmu_lbr_disable(); | ||
112 | } | ||
113 | |||
114 | static inline u64 intel_pmu_lbr_tos(void) | ||
115 | { | ||
116 | u64 tos; | ||
117 | |||
118 | rdmsrl(x86_pmu.lbr_tos, tos); | ||
119 | |||
120 | return tos; | ||
121 | } | ||
122 | |||
123 | static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | ||
124 | { | ||
125 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
126 | u64 tos = intel_pmu_lbr_tos(); | ||
127 | int i; | ||
128 | |||
129 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
130 | unsigned long lbr_idx = (tos - i) & mask; | ||
131 | union { | ||
132 | struct { | ||
133 | u32 from; | ||
134 | u32 to; | ||
135 | }; | ||
136 | u64 lbr; | ||
137 | } msr_lastbranch; | ||
138 | |||
139 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | ||
140 | |||
141 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | ||
142 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | ||
143 | cpuc->lbr_entries[i].flags = 0; | ||
144 | } | ||
145 | cpuc->lbr_stack.nr = i; | ||
146 | } | ||
147 | |||
148 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
149 | |||
150 | /* | ||
151 | * Due to lack of segmentation in Linux the effective address (offset) | ||
152 | * is the same as the linear address, allowing us to merge the LIP and EIP | ||
153 | * LBR formats. | ||
154 | */ | ||
155 | static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | ||
156 | { | ||
157 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
158 | int lbr_format = x86_pmu.intel_cap.lbr_format; | ||
159 | u64 tos = intel_pmu_lbr_tos(); | ||
160 | int i; | ||
161 | |||
162 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
163 | unsigned long lbr_idx = (tos - i) & mask; | ||
164 | u64 from, to, flags = 0; | ||
165 | |||
166 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | ||
167 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | ||
168 | |||
169 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { | ||
170 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | ||
171 | from = (u64)((((s64)from) << 1) >> 1); | ||
172 | } | ||
173 | |||
174 | cpuc->lbr_entries[i].from = from; | ||
175 | cpuc->lbr_entries[i].to = to; | ||
176 | cpuc->lbr_entries[i].flags = flags; | ||
177 | } | ||
178 | cpuc->lbr_stack.nr = i; | ||
179 | } | ||
180 | |||
181 | static void intel_pmu_lbr_read(void) | ||
182 | { | ||
183 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
184 | |||
185 | if (!cpuc->lbr_users) | ||
186 | return; | ||
187 | |||
188 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
189 | intel_pmu_lbr_read_32(cpuc); | ||
190 | else | ||
191 | intel_pmu_lbr_read_64(cpuc); | ||
192 | } | ||
193 | |||
194 | static void intel_pmu_lbr_init_core(void) | ||
195 | { | ||
196 | x86_pmu.lbr_nr = 4; | ||
197 | x86_pmu.lbr_tos = 0x01c9; | ||
198 | x86_pmu.lbr_from = 0x40; | ||
199 | x86_pmu.lbr_to = 0x60; | ||
200 | } | ||
201 | |||
202 | static void intel_pmu_lbr_init_nhm(void) | ||
203 | { | ||
204 | x86_pmu.lbr_nr = 16; | ||
205 | x86_pmu.lbr_tos = 0x01c9; | ||
206 | x86_pmu.lbr_from = 0x680; | ||
207 | x86_pmu.lbr_to = 0x6c0; | ||
208 | } | ||
209 | |||
210 | static void intel_pmu_lbr_init_atom(void) | ||
211 | { | ||
212 | x86_pmu.lbr_nr = 8; | ||
213 | x86_pmu.lbr_tos = 0x01c9; | ||
214 | x86_pmu.lbr_from = 0x40; | ||
215 | x86_pmu.lbr_to = 0x60; | ||
216 | } | ||
217 | |||
218 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c new file mode 100644 index 000000000000..249015173992 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -0,0 +1,951 @@ | |||
1 | /* | ||
2 | * Netburst Perfomance Events (P4, old Xeon) | ||
3 | * | ||
4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | ||
5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | ||
6 | * | ||
7 | * For licencing details see kernel-base/COPYING | ||
8 | */ | ||
9 | |||
10 | #ifdef CONFIG_CPU_SUP_INTEL | ||
11 | |||
12 | #include <asm/perf_event_p4.h> | ||
13 | |||
14 | #define P4_CNTR_LIMIT 3 | ||
15 | /* | ||
16 | * array indices: 0,1 - HT threads, used with HT enabled cpu | ||
17 | */ | ||
18 | struct p4_event_bind { | ||
19 | unsigned int opcode; /* Event code and ESCR selector */ | ||
20 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | ||
21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ | ||
22 | }; | ||
23 | |||
24 | struct p4_pebs_bind { | ||
25 | unsigned int metric_pebs; | ||
26 | unsigned int metric_vert; | ||
27 | }; | ||
28 | |||
29 | /* it sets P4_PEBS_ENABLE_UOP_TAG as well */ | ||
30 | #define P4_GEN_PEBS_BIND(name, pebs, vert) \ | ||
31 | [P4_PEBS_METRIC__##name] = { \ | ||
32 | .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \ | ||
33 | .metric_vert = vert, \ | ||
34 | } | ||
35 | |||
36 | /* | ||
37 | * note we have P4_PEBS_ENABLE_UOP_TAG always set here | ||
38 | * | ||
39 | * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of | ||
40 | * event configuration to find out which values are to be | ||
41 | * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT | ||
42 | * resgisters | ||
43 | */ | ||
44 | static struct p4_pebs_bind p4_pebs_bind_map[] = { | ||
45 | P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001), | ||
46 | P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001), | ||
47 | P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001), | ||
48 | P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002), | ||
49 | P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003), | ||
50 | P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010), | ||
51 | P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001), | ||
52 | P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001), | ||
53 | P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002), | ||
54 | }; | ||
55 | |||
56 | /* | ||
57 | * Note that we don't use CCCR1 here, there is an | ||
58 | * exception for P4_BSQ_ALLOCATION but we just have | ||
59 | * no workaround | ||
60 | * | ||
61 | * consider this binding as resources which particular | ||
62 | * event may borrow, it doesn't contain EventMask, | ||
63 | * Tags and friends -- they are left to a caller | ||
64 | */ | ||
65 | static struct p4_event_bind p4_event_bind_map[] = { | ||
66 | [P4_EVENT_TC_DELIVER_MODE] = { | ||
67 | .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), | ||
68 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | ||
69 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
70 | }, | ||
71 | [P4_EVENT_BPU_FETCH_REQUEST] = { | ||
72 | .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), | ||
73 | .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, | ||
74 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
75 | }, | ||
76 | [P4_EVENT_ITLB_REFERENCE] = { | ||
77 | .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), | ||
78 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | ||
79 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
80 | }, | ||
81 | [P4_EVENT_MEMORY_CANCEL] = { | ||
82 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), | ||
83 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | ||
84 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
85 | }, | ||
86 | [P4_EVENT_MEMORY_COMPLETE] = { | ||
87 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), | ||
88 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | ||
89 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
90 | }, | ||
91 | [P4_EVENT_LOAD_PORT_REPLAY] = { | ||
92 | .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), | ||
93 | .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, | ||
94 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
95 | }, | ||
96 | [P4_EVENT_STORE_PORT_REPLAY] = { | ||
97 | .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), | ||
98 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | ||
99 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
100 | }, | ||
101 | [P4_EVENT_MOB_LOAD_REPLAY] = { | ||
102 | .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), | ||
103 | .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, | ||
104 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
105 | }, | ||
106 | [P4_EVENT_PAGE_WALK_TYPE] = { | ||
107 | .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), | ||
108 | .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, | ||
109 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
110 | }, | ||
111 | [P4_EVENT_BSQ_CACHE_REFERENCE] = { | ||
112 | .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), | ||
113 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | ||
114 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
115 | }, | ||
116 | [P4_EVENT_IOQ_ALLOCATION] = { | ||
117 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), | ||
118 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
119 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
120 | }, | ||
121 | [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | ||
122 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), | ||
123 | .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, | ||
124 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | ||
125 | }, | ||
126 | [P4_EVENT_FSB_DATA_ACTIVITY] = { | ||
127 | .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), | ||
128 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
129 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
130 | }, | ||
131 | [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ | ||
132 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), | ||
133 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, | ||
134 | .cntr = { {0, -1, -1}, {1, -1, -1} }, | ||
135 | }, | ||
136 | [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | ||
137 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), | ||
138 | .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, | ||
139 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | ||
140 | }, | ||
141 | [P4_EVENT_SSE_INPUT_ASSIST] = { | ||
142 | .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), | ||
143 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
144 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
145 | }, | ||
146 | [P4_EVENT_PACKED_SP_UOP] = { | ||
147 | .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), | ||
148 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
149 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
150 | }, | ||
151 | [P4_EVENT_PACKED_DP_UOP] = { | ||
152 | .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), | ||
153 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
154 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
155 | }, | ||
156 | [P4_EVENT_SCALAR_SP_UOP] = { | ||
157 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), | ||
158 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
159 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
160 | }, | ||
161 | [P4_EVENT_SCALAR_DP_UOP] = { | ||
162 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), | ||
163 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
164 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
165 | }, | ||
166 | [P4_EVENT_64BIT_MMX_UOP] = { | ||
167 | .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), | ||
168 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
169 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
170 | }, | ||
171 | [P4_EVENT_128BIT_MMX_UOP] = { | ||
172 | .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), | ||
173 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
174 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
175 | }, | ||
176 | [P4_EVENT_X87_FP_UOP] = { | ||
177 | .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), | ||
178 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
179 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
180 | }, | ||
181 | [P4_EVENT_TC_MISC] = { | ||
182 | .opcode = P4_OPCODE(P4_EVENT_TC_MISC), | ||
183 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | ||
184 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
185 | }, | ||
186 | [P4_EVENT_GLOBAL_POWER_EVENTS] = { | ||
187 | .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), | ||
188 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
189 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
190 | }, | ||
191 | [P4_EVENT_TC_MS_XFER] = { | ||
192 | .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), | ||
193 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | ||
194 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
195 | }, | ||
196 | [P4_EVENT_UOP_QUEUE_WRITES] = { | ||
197 | .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), | ||
198 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | ||
199 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
200 | }, | ||
201 | [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { | ||
202 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), | ||
203 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, | ||
204 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
205 | }, | ||
206 | [P4_EVENT_RETIRED_BRANCH_TYPE] = { | ||
207 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), | ||
208 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, | ||
209 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
210 | }, | ||
211 | [P4_EVENT_RESOURCE_STALL] = { | ||
212 | .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), | ||
213 | .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, | ||
214 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
215 | }, | ||
216 | [P4_EVENT_WC_BUFFER] = { | ||
217 | .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), | ||
218 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | ||
219 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
220 | }, | ||
221 | [P4_EVENT_B2B_CYCLES] = { | ||
222 | .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), | ||
223 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
224 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
225 | }, | ||
226 | [P4_EVENT_BNR] = { | ||
227 | .opcode = P4_OPCODE(P4_EVENT_BNR), | ||
228 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
229 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
230 | }, | ||
231 | [P4_EVENT_SNOOP] = { | ||
232 | .opcode = P4_OPCODE(P4_EVENT_SNOOP), | ||
233 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
234 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
235 | }, | ||
236 | [P4_EVENT_RESPONSE] = { | ||
237 | .opcode = P4_OPCODE(P4_EVENT_RESPONSE), | ||
238 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
239 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
240 | }, | ||
241 | [P4_EVENT_FRONT_END_EVENT] = { | ||
242 | .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), | ||
243 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
244 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
245 | }, | ||
246 | [P4_EVENT_EXECUTION_EVENT] = { | ||
247 | .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), | ||
248 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
249 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
250 | }, | ||
251 | [P4_EVENT_REPLAY_EVENT] = { | ||
252 | .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), | ||
253 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
254 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
255 | }, | ||
256 | [P4_EVENT_INSTR_RETIRED] = { | ||
257 | .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), | ||
258 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
259 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
260 | }, | ||
261 | [P4_EVENT_UOPS_RETIRED] = { | ||
262 | .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), | ||
263 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
264 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
265 | }, | ||
266 | [P4_EVENT_UOP_TYPE] = { | ||
267 | .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), | ||
268 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | ||
269 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
270 | }, | ||
271 | [P4_EVENT_BRANCH_RETIRED] = { | ||
272 | .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), | ||
273 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
274 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
275 | }, | ||
276 | [P4_EVENT_MISPRED_BRANCH_RETIRED] = { | ||
277 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), | ||
278 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
279 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
280 | }, | ||
281 | [P4_EVENT_X87_ASSIST] = { | ||
282 | .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), | ||
283 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
284 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
285 | }, | ||
286 | [P4_EVENT_MACHINE_CLEAR] = { | ||
287 | .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), | ||
288 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
289 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
290 | }, | ||
291 | [P4_EVENT_INSTR_COMPLETED] = { | ||
292 | .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), | ||
293 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
294 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
295 | }, | ||
296 | }; | ||
297 | |||
298 | #define P4_GEN_CACHE_EVENT(event, bit, metric) \ | ||
299 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ | ||
300 | P4_ESCR_EMASK_BIT(event, bit)) | \ | ||
301 | p4_config_pack_cccr(metric | \ | ||
302 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) | ||
303 | |||
304 | static __initconst const u64 p4_hw_cache_event_ids | ||
305 | [PERF_COUNT_HW_CACHE_MAX] | ||
306 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
307 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
308 | { | ||
309 | [ C(L1D ) ] = { | ||
310 | [ C(OP_READ) ] = { | ||
311 | [ C(RESULT_ACCESS) ] = 0x0, | ||
312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
313 | P4_PEBS_METRIC__1stl_cache_load_miss_retired), | ||
314 | }, | ||
315 | }, | ||
316 | [ C(LL ) ] = { | ||
317 | [ C(OP_READ) ] = { | ||
318 | [ C(RESULT_ACCESS) ] = 0x0, | ||
319 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
320 | P4_PEBS_METRIC__2ndl_cache_load_miss_retired), | ||
321 | }, | ||
322 | }, | ||
323 | [ C(DTLB) ] = { | ||
324 | [ C(OP_READ) ] = { | ||
325 | [ C(RESULT_ACCESS) ] = 0x0, | ||
326 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
327 | P4_PEBS_METRIC__dtlb_load_miss_retired), | ||
328 | }, | ||
329 | [ C(OP_WRITE) ] = { | ||
330 | [ C(RESULT_ACCESS) ] = 0x0, | ||
331 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
332 | P4_PEBS_METRIC__dtlb_store_miss_retired), | ||
333 | }, | ||
334 | }, | ||
335 | [ C(ITLB) ] = { | ||
336 | [ C(OP_READ) ] = { | ||
337 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, | ||
338 | P4_PEBS_METRIC__none), | ||
339 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, | ||
340 | P4_PEBS_METRIC__none), | ||
341 | }, | ||
342 | [ C(OP_WRITE) ] = { | ||
343 | [ C(RESULT_ACCESS) ] = -1, | ||
344 | [ C(RESULT_MISS) ] = -1, | ||
345 | }, | ||
346 | [ C(OP_PREFETCH) ] = { | ||
347 | [ C(RESULT_ACCESS) ] = -1, | ||
348 | [ C(RESULT_MISS) ] = -1, | ||
349 | }, | ||
350 | }, | ||
351 | }; | ||
352 | |||
353 | static u64 p4_general_events[PERF_COUNT_HW_MAX] = { | ||
354 | /* non-halted CPU clocks */ | ||
355 | [PERF_COUNT_HW_CPU_CYCLES] = | ||
356 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | | ||
357 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), | ||
358 | |||
359 | /* | ||
360 | * retired instructions | ||
361 | * in a sake of simplicity we don't use the FSB tagging | ||
362 | */ | ||
363 | [PERF_COUNT_HW_INSTRUCTIONS] = | ||
364 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) | | ||
365 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | | ||
366 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)), | ||
367 | |||
368 | /* cache hits */ | ||
369 | [PERF_COUNT_HW_CACHE_REFERENCES] = | ||
370 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | ||
371 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | ||
372 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | ||
373 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | ||
374 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | ||
375 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | ||
376 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)), | ||
377 | |||
378 | /* cache misses */ | ||
379 | [PERF_COUNT_HW_CACHE_MISSES] = | ||
380 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | ||
381 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | ||
382 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | ||
383 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)), | ||
384 | |||
385 | /* branch instructions retired */ | ||
386 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = | ||
387 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) | | ||
388 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | | ||
389 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | | ||
390 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | | ||
391 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)), | ||
392 | |||
393 | /* mispredicted branches retired */ | ||
394 | [PERF_COUNT_HW_BRANCH_MISSES] = | ||
395 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) | | ||
396 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)), | ||
397 | |||
398 | /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ | ||
399 | [PERF_COUNT_HW_BUS_CYCLES] = | ||
400 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) | | ||
401 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | | ||
402 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) | | ||
403 | p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), | ||
404 | }; | ||
405 | |||
406 | static struct p4_event_bind *p4_config_get_bind(u64 config) | ||
407 | { | ||
408 | unsigned int evnt = p4_config_unpack_event(config); | ||
409 | struct p4_event_bind *bind = NULL; | ||
410 | |||
411 | if (evnt < ARRAY_SIZE(p4_event_bind_map)) | ||
412 | bind = &p4_event_bind_map[evnt]; | ||
413 | |||
414 | return bind; | ||
415 | } | ||
416 | |||
417 | static u64 p4_pmu_event_map(int hw_event) | ||
418 | { | ||
419 | struct p4_event_bind *bind; | ||
420 | unsigned int esel; | ||
421 | u64 config; | ||
422 | |||
423 | config = p4_general_events[hw_event]; | ||
424 | bind = p4_config_get_bind(config); | ||
425 | esel = P4_OPCODE_ESEL(bind->opcode); | ||
426 | config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); | ||
427 | |||
428 | return config; | ||
429 | } | ||
430 | |||
431 | static int p4_validate_raw_event(struct perf_event *event) | ||
432 | { | ||
433 | unsigned int v; | ||
434 | |||
435 | /* user data may have out-of-bound event index */ | ||
436 | v = p4_config_unpack_event(event->attr.config); | ||
437 | if (v >= ARRAY_SIZE(p4_event_bind_map)) { | ||
438 | pr_warning("P4 PMU: Unknown event code: %d\n", v); | ||
439 | return -EINVAL; | ||
440 | } | ||
441 | |||
442 | /* | ||
443 | * it may have some screwed PEBS bits | ||
444 | */ | ||
445 | if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { | ||
446 | pr_warning("P4 PMU: PEBS are not supported yet\n"); | ||
447 | return -EINVAL; | ||
448 | } | ||
449 | v = p4_config_unpack_metric(event->attr.config); | ||
450 | if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { | ||
451 | pr_warning("P4 PMU: Unknown metric code: %d\n", v); | ||
452 | return -EINVAL; | ||
453 | } | ||
454 | |||
455 | return 0; | ||
456 | } | ||
457 | |||
458 | static int p4_hw_config(struct perf_event *event) | ||
459 | { | ||
460 | int cpu = get_cpu(); | ||
461 | int rc = 0; | ||
462 | u32 escr, cccr; | ||
463 | |||
464 | /* | ||
465 | * the reason we use cpu that early is that: if we get scheduled | ||
466 | * first time on the same cpu -- we will not need swap thread | ||
467 | * specific flags in config (and will save some cpu cycles) | ||
468 | */ | ||
469 | |||
470 | cccr = p4_default_cccr_conf(cpu); | ||
471 | escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel, | ||
472 | event->attr.exclude_user); | ||
473 | event->hw.config = p4_config_pack_escr(escr) | | ||
474 | p4_config_pack_cccr(cccr); | ||
475 | |||
476 | if (p4_ht_active() && p4_ht_thread(cpu)) | ||
477 | event->hw.config = p4_set_ht_bit(event->hw.config); | ||
478 | |||
479 | if (event->attr.type == PERF_TYPE_RAW) { | ||
480 | |||
481 | rc = p4_validate_raw_event(event); | ||
482 | if (rc) | ||
483 | goto out; | ||
484 | |||
485 | /* | ||
486 | * We don't control raw events so it's up to the caller | ||
487 | * to pass sane values (and we don't count the thread number | ||
488 | * on HT machine but allow HT-compatible specifics to be | ||
489 | * passed on) | ||
490 | * | ||
491 | * Note that for RAW events we allow user to use P4_CCCR_RESERVED | ||
492 | * bits since we keep additional info here (for cache events and etc) | ||
493 | * | ||
494 | * XXX: HT wide things should check perf_paranoid_cpu() && | ||
495 | * CAP_SYS_ADMIN | ||
496 | */ | ||
497 | event->hw.config |= event->attr.config & | ||
498 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | ||
499 | p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); | ||
500 | |||
501 | event->hw.config &= ~P4_CCCR_FORCE_OVF; | ||
502 | } | ||
503 | |||
504 | rc = x86_setup_perfctr(event); | ||
505 | out: | ||
506 | put_cpu(); | ||
507 | return rc; | ||
508 | } | ||
509 | |||
510 | static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | ||
511 | { | ||
512 | int overflow = 0; | ||
513 | u32 low, high; | ||
514 | |||
515 | rdmsr(hwc->config_base + hwc->idx, low, high); | ||
516 | |||
517 | /* we need to check high bit for unflagged overflows */ | ||
518 | if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { | ||
519 | overflow = 1; | ||
520 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
521 | ((u64)low) & ~P4_CCCR_OVF); | ||
522 | } | ||
523 | |||
524 | return overflow; | ||
525 | } | ||
526 | |||
527 | static void p4_pmu_disable_pebs(void) | ||
528 | { | ||
529 | /* | ||
530 | * FIXME | ||
531 | * | ||
532 | * It's still allowed that two threads setup same cache | ||
533 | * events so we can't simply clear metrics until we knew | ||
534 | * noone is depending on us, so we need kind of counter | ||
535 | * for "ReplayEvent" users. | ||
536 | * | ||
537 | * What is more complex -- RAW events, if user (for some | ||
538 | * reason) will pass some cache event metric with improper | ||
539 | * event opcode -- it's fine from hardware point of view | ||
540 | * but completely nonsence from "meaning" of such action. | ||
541 | * | ||
542 | * So at moment let leave metrics turned on forever -- it's | ||
543 | * ok for now but need to be revisited! | ||
544 | * | ||
545 | * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); | ||
546 | * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); | ||
547 | */ | ||
548 | } | ||
549 | |||
550 | static inline void p4_pmu_disable_event(struct perf_event *event) | ||
551 | { | ||
552 | struct hw_perf_event *hwc = &event->hw; | ||
553 | |||
554 | /* | ||
555 | * If event gets disabled while counter is in overflowed | ||
556 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | ||
557 | * asserted again and again | ||
558 | */ | ||
559 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
560 | (u64)(p4_config_unpack_cccr(hwc->config)) & | ||
561 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); | ||
562 | } | ||
563 | |||
564 | static void p4_pmu_disable_all(void) | ||
565 | { | ||
566 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
567 | int idx; | ||
568 | |||
569 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
570 | struct perf_event *event = cpuc->events[idx]; | ||
571 | if (!test_bit(idx, cpuc->active_mask)) | ||
572 | continue; | ||
573 | p4_pmu_disable_event(event); | ||
574 | } | ||
575 | |||
576 | p4_pmu_disable_pebs(); | ||
577 | } | ||
578 | |||
579 | /* configuration must be valid */ | ||
580 | static void p4_pmu_enable_pebs(u64 config) | ||
581 | { | ||
582 | struct p4_pebs_bind *bind; | ||
583 | unsigned int idx; | ||
584 | |||
585 | BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK); | ||
586 | |||
587 | idx = p4_config_unpack_metric(config); | ||
588 | if (idx == P4_PEBS_METRIC__none) | ||
589 | return; | ||
590 | |||
591 | bind = &p4_pebs_bind_map[idx]; | ||
592 | |||
593 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); | ||
594 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); | ||
595 | } | ||
596 | |||
597 | static void p4_pmu_enable_event(struct perf_event *event) | ||
598 | { | ||
599 | struct hw_perf_event *hwc = &event->hw; | ||
600 | int thread = p4_ht_config_thread(hwc->config); | ||
601 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | ||
602 | unsigned int idx = p4_config_unpack_event(hwc->config); | ||
603 | struct p4_event_bind *bind; | ||
604 | u64 escr_addr, cccr; | ||
605 | |||
606 | bind = &p4_event_bind_map[idx]; | ||
607 | escr_addr = (u64)bind->escr_msr[thread]; | ||
608 | |||
609 | /* | ||
610 | * - we dont support cascaded counters yet | ||
611 | * - and counter 1 is broken (erratum) | ||
612 | */ | ||
613 | WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); | ||
614 | WARN_ON_ONCE(hwc->idx == 1); | ||
615 | |||
616 | /* we need a real Event value */ | ||
617 | escr_conf &= ~P4_ESCR_EVENT_MASK; | ||
618 | escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode)); | ||
619 | |||
620 | cccr = p4_config_unpack_cccr(hwc->config); | ||
621 | |||
622 | /* | ||
623 | * it could be Cache event so we need to write metrics | ||
624 | * into additional MSRs | ||
625 | */ | ||
626 | p4_pmu_enable_pebs(hwc->config); | ||
627 | |||
628 | (void)checking_wrmsrl(escr_addr, escr_conf); | ||
629 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
630 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); | ||
631 | } | ||
632 | |||
633 | static void p4_pmu_enable_all(int added) | ||
634 | { | ||
635 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
636 | int idx; | ||
637 | |||
638 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
639 | struct perf_event *event = cpuc->events[idx]; | ||
640 | if (!test_bit(idx, cpuc->active_mask)) | ||
641 | continue; | ||
642 | p4_pmu_enable_event(event); | ||
643 | } | ||
644 | } | ||
645 | |||
646 | static int p4_pmu_handle_irq(struct pt_regs *regs) | ||
647 | { | ||
648 | struct perf_sample_data data; | ||
649 | struct cpu_hw_events *cpuc; | ||
650 | struct perf_event *event; | ||
651 | struct hw_perf_event *hwc; | ||
652 | int idx, handled = 0; | ||
653 | u64 val; | ||
654 | |||
655 | data.addr = 0; | ||
656 | data.raw = NULL; | ||
657 | |||
658 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
659 | |||
660 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
661 | int overflow; | ||
662 | |||
663 | if (!test_bit(idx, cpuc->active_mask)) { | ||
664 | /* catch in-flight IRQs */ | ||
665 | if (__test_and_clear_bit(idx, cpuc->running)) | ||
666 | handled++; | ||
667 | continue; | ||
668 | } | ||
669 | |||
670 | event = cpuc->events[idx]; | ||
671 | hwc = &event->hw; | ||
672 | |||
673 | WARN_ON_ONCE(hwc->idx != idx); | ||
674 | |||
675 | /* it might be unflagged overflow */ | ||
676 | overflow = p4_pmu_clear_cccr_ovf(hwc); | ||
677 | |||
678 | val = x86_perf_event_update(event); | ||
679 | if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) | ||
680 | continue; | ||
681 | |||
682 | handled += overflow; | ||
683 | |||
684 | /* event overflow for sure */ | ||
685 | data.period = event->hw.last_period; | ||
686 | |||
687 | if (!x86_perf_event_set_period(event)) | ||
688 | continue; | ||
689 | if (perf_event_overflow(event, 1, &data, regs)) | ||
690 | p4_pmu_disable_event(event); | ||
691 | } | ||
692 | |||
693 | if (handled) { | ||
694 | /* p4 quirk: unmask it again */ | ||
695 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
696 | inc_irq_stat(apic_perf_irqs); | ||
697 | } | ||
698 | |||
699 | return handled; | ||
700 | } | ||
701 | |||
702 | /* | ||
703 | * swap thread specific fields according to a thread | ||
704 | * we are going to run on | ||
705 | */ | ||
706 | static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | ||
707 | { | ||
708 | u32 escr, cccr; | ||
709 | |||
710 | /* | ||
711 | * we either lucky and continue on same cpu or no HT support | ||
712 | */ | ||
713 | if (!p4_should_swap_ts(hwc->config, cpu)) | ||
714 | return; | ||
715 | |||
716 | /* | ||
717 | * the event is migrated from an another logical | ||
718 | * cpu, so we need to swap thread specific flags | ||
719 | */ | ||
720 | |||
721 | escr = p4_config_unpack_escr(hwc->config); | ||
722 | cccr = p4_config_unpack_cccr(hwc->config); | ||
723 | |||
724 | if (p4_ht_thread(cpu)) { | ||
725 | cccr &= ~P4_CCCR_OVF_PMI_T0; | ||
726 | cccr |= P4_CCCR_OVF_PMI_T1; | ||
727 | if (escr & P4_ESCR_T0_OS) { | ||
728 | escr &= ~P4_ESCR_T0_OS; | ||
729 | escr |= P4_ESCR_T1_OS; | ||
730 | } | ||
731 | if (escr & P4_ESCR_T0_USR) { | ||
732 | escr &= ~P4_ESCR_T0_USR; | ||
733 | escr |= P4_ESCR_T1_USR; | ||
734 | } | ||
735 | hwc->config = p4_config_pack_escr(escr); | ||
736 | hwc->config |= p4_config_pack_cccr(cccr); | ||
737 | hwc->config |= P4_CONFIG_HT; | ||
738 | } else { | ||
739 | cccr &= ~P4_CCCR_OVF_PMI_T1; | ||
740 | cccr |= P4_CCCR_OVF_PMI_T0; | ||
741 | if (escr & P4_ESCR_T1_OS) { | ||
742 | escr &= ~P4_ESCR_T1_OS; | ||
743 | escr |= P4_ESCR_T0_OS; | ||
744 | } | ||
745 | if (escr & P4_ESCR_T1_USR) { | ||
746 | escr &= ~P4_ESCR_T1_USR; | ||
747 | escr |= P4_ESCR_T0_USR; | ||
748 | } | ||
749 | hwc->config = p4_config_pack_escr(escr); | ||
750 | hwc->config |= p4_config_pack_cccr(cccr); | ||
751 | hwc->config &= ~P4_CONFIG_HT; | ||
752 | } | ||
753 | } | ||
754 | |||
755 | /* | ||
756 | * ESCR address hashing is tricky, ESCRs are not sequential | ||
757 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and | ||
758 | * the metric between any ESCRs is laid in range [0xa0,0xe1] | ||
759 | * | ||
760 | * so we make ~70% filled hashtable | ||
761 | */ | ||
762 | |||
763 | #define P4_ESCR_MSR_BASE 0x000003a0 | ||
764 | #define P4_ESCR_MSR_MAX 0x000003e1 | ||
765 | #define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1) | ||
766 | #define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE) | ||
767 | #define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr | ||
768 | |||
769 | static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = { | ||
770 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0), | ||
771 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1), | ||
772 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0), | ||
773 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1), | ||
774 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0), | ||
775 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1), | ||
776 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0), | ||
777 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1), | ||
778 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2), | ||
779 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3), | ||
780 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4), | ||
781 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5), | ||
782 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0), | ||
783 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1), | ||
784 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0), | ||
785 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1), | ||
786 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0), | ||
787 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1), | ||
788 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0), | ||
789 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1), | ||
790 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0), | ||
791 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1), | ||
792 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0), | ||
793 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1), | ||
794 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0), | ||
795 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1), | ||
796 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0), | ||
797 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1), | ||
798 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0), | ||
799 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1), | ||
800 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0), | ||
801 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1), | ||
802 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0), | ||
803 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1), | ||
804 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0), | ||
805 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1), | ||
806 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0), | ||
807 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1), | ||
808 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0), | ||
809 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1), | ||
810 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0), | ||
811 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1), | ||
812 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0), | ||
813 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1), | ||
814 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0), | ||
815 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1), | ||
816 | }; | ||
817 | |||
818 | static int p4_get_escr_idx(unsigned int addr) | ||
819 | { | ||
820 | unsigned int idx = P4_ESCR_MSR_IDX(addr); | ||
821 | |||
822 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || | ||
823 | !p4_escr_table[idx] || | ||
824 | p4_escr_table[idx] != addr)) { | ||
825 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); | ||
826 | return -1; | ||
827 | } | ||
828 | |||
829 | return idx; | ||
830 | } | ||
831 | |||
832 | static int p4_next_cntr(int thread, unsigned long *used_mask, | ||
833 | struct p4_event_bind *bind) | ||
834 | { | ||
835 | int i, j; | ||
836 | |||
837 | for (i = 0; i < P4_CNTR_LIMIT; i++) { | ||
838 | j = bind->cntr[thread][i]; | ||
839 | if (j != -1 && !test_bit(j, used_mask)) | ||
840 | return j; | ||
841 | } | ||
842 | |||
843 | return -1; | ||
844 | } | ||
845 | |||
846 | static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | ||
847 | { | ||
848 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
849 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; | ||
850 | int cpu = smp_processor_id(); | ||
851 | struct hw_perf_event *hwc; | ||
852 | struct p4_event_bind *bind; | ||
853 | unsigned int i, thread, num; | ||
854 | int cntr_idx, escr_idx; | ||
855 | |||
856 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
857 | bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); | ||
858 | |||
859 | for (i = 0, num = n; i < n; i++, num--) { | ||
860 | |||
861 | hwc = &cpuc->event_list[i]->hw; | ||
862 | thread = p4_ht_thread(cpu); | ||
863 | bind = p4_config_get_bind(hwc->config); | ||
864 | escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); | ||
865 | if (unlikely(escr_idx == -1)) | ||
866 | goto done; | ||
867 | |||
868 | if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { | ||
869 | cntr_idx = hwc->idx; | ||
870 | if (assign) | ||
871 | assign[i] = hwc->idx; | ||
872 | goto reserve; | ||
873 | } | ||
874 | |||
875 | cntr_idx = p4_next_cntr(thread, used_mask, bind); | ||
876 | if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) | ||
877 | goto done; | ||
878 | |||
879 | p4_pmu_swap_config_ts(hwc, cpu); | ||
880 | if (assign) | ||
881 | assign[i] = cntr_idx; | ||
882 | reserve: | ||
883 | set_bit(cntr_idx, used_mask); | ||
884 | set_bit(escr_idx, escr_mask); | ||
885 | } | ||
886 | |||
887 | done: | ||
888 | return num ? -ENOSPC : 0; | ||
889 | } | ||
890 | |||
891 | static __initconst const struct x86_pmu p4_pmu = { | ||
892 | .name = "Netburst P4/Xeon", | ||
893 | .handle_irq = p4_pmu_handle_irq, | ||
894 | .disable_all = p4_pmu_disable_all, | ||
895 | .enable_all = p4_pmu_enable_all, | ||
896 | .enable = p4_pmu_enable_event, | ||
897 | .disable = p4_pmu_disable_event, | ||
898 | .eventsel = MSR_P4_BPU_CCCR0, | ||
899 | .perfctr = MSR_P4_BPU_PERFCTR0, | ||
900 | .event_map = p4_pmu_event_map, | ||
901 | .max_events = ARRAY_SIZE(p4_general_events), | ||
902 | .get_event_constraints = x86_get_event_constraints, | ||
903 | /* | ||
904 | * IF HT disabled we may need to use all | ||
905 | * ARCH_P4_MAX_CCCR counters simulaneously | ||
906 | * though leave it restricted at moment assuming | ||
907 | * HT is on | ||
908 | */ | ||
909 | .num_counters = ARCH_P4_MAX_CCCR, | ||
910 | .apic = 1, | ||
911 | .cntval_bits = 40, | ||
912 | .cntval_mask = (1ULL << 40) - 1, | ||
913 | .max_period = (1ULL << 39) - 1, | ||
914 | .hw_config = p4_hw_config, | ||
915 | .schedule_events = p4_pmu_schedule_events, | ||
916 | /* | ||
917 | * This handles erratum N15 in intel doc 249199-029, | ||
918 | * the counter may not be updated correctly on write | ||
919 | * so we need a second write operation to do the trick | ||
920 | * (the official workaround didn't work) | ||
921 | * | ||
922 | * the former idea is taken from OProfile code | ||
923 | */ | ||
924 | .perfctr_second_write = 1, | ||
925 | }; | ||
926 | |||
927 | static __init int p4_pmu_init(void) | ||
928 | { | ||
929 | unsigned int low, high; | ||
930 | |||
931 | /* If we get stripped -- indexig fails */ | ||
932 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | ||
933 | |||
934 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | ||
935 | if (!(low & (1 << 7))) { | ||
936 | pr_cont("unsupported Netburst CPU model %d ", | ||
937 | boot_cpu_data.x86_model); | ||
938 | return -ENODEV; | ||
939 | } | ||
940 | |||
941 | memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, | ||
942 | sizeof(hw_cache_event_ids)); | ||
943 | |||
944 | pr_cont("Netburst events, "); | ||
945 | |||
946 | x86_pmu = p4_pmu; | ||
947 | |||
948 | return 0; | ||
949 | } | ||
950 | |||
951 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c new file mode 100644 index 000000000000..34ba07be2cda --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -0,0 +1,142 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* | ||
4 | * Not sure about some of these | ||
5 | */ | ||
6 | static const u64 p6_perfmon_event_map[] = | ||
7 | { | ||
8 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | ||
9 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
10 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, | ||
11 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, | ||
12 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
13 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
14 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | ||
15 | }; | ||
16 | |||
17 | static u64 p6_pmu_event_map(int hw_event) | ||
18 | { | ||
19 | return p6_perfmon_event_map[hw_event]; | ||
20 | } | ||
21 | |||
22 | /* | ||
23 | * Event setting that is specified not to count anything. | ||
24 | * We use this to effectively disable a counter. | ||
25 | * | ||
26 | * L2_RQSTS with 0 MESI unit mask. | ||
27 | */ | ||
28 | #define P6_NOP_EVENT 0x0000002EULL | ||
29 | |||
30 | static struct event_constraint p6_event_constraints[] = | ||
31 | { | ||
32 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | ||
33 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
34 | INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | ||
35 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
36 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
37 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
38 | EVENT_CONSTRAINT_END | ||
39 | }; | ||
40 | |||
41 | static void p6_pmu_disable_all(void) | ||
42 | { | ||
43 | u64 val; | ||
44 | |||
45 | /* p6 only has one enable register */ | ||
46 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
47 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; | ||
48 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
49 | } | ||
50 | |||
51 | static void p6_pmu_enable_all(int added) | ||
52 | { | ||
53 | unsigned long val; | ||
54 | |||
55 | /* p6 only has one enable register */ | ||
56 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
57 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
58 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
59 | } | ||
60 | |||
61 | static inline void | ||
62 | p6_pmu_disable_event(struct perf_event *event) | ||
63 | { | ||
64 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
65 | struct hw_perf_event *hwc = &event->hw; | ||
66 | u64 val = P6_NOP_EVENT; | ||
67 | |||
68 | if (cpuc->enabled) | ||
69 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
70 | |||
71 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | ||
72 | } | ||
73 | |||
74 | static void p6_pmu_enable_event(struct perf_event *event) | ||
75 | { | ||
76 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
77 | struct hw_perf_event *hwc = &event->hw; | ||
78 | u64 val; | ||
79 | |||
80 | val = hwc->config; | ||
81 | if (cpuc->enabled) | ||
82 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
83 | |||
84 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | ||
85 | } | ||
86 | |||
87 | static __initconst const struct x86_pmu p6_pmu = { | ||
88 | .name = "p6", | ||
89 | .handle_irq = x86_pmu_handle_irq, | ||
90 | .disable_all = p6_pmu_disable_all, | ||
91 | .enable_all = p6_pmu_enable_all, | ||
92 | .enable = p6_pmu_enable_event, | ||
93 | .disable = p6_pmu_disable_event, | ||
94 | .hw_config = x86_pmu_hw_config, | ||
95 | .schedule_events = x86_schedule_events, | ||
96 | .eventsel = MSR_P6_EVNTSEL0, | ||
97 | .perfctr = MSR_P6_PERFCTR0, | ||
98 | .event_map = p6_pmu_event_map, | ||
99 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | ||
100 | .apic = 1, | ||
101 | .max_period = (1ULL << 31) - 1, | ||
102 | .version = 0, | ||
103 | .num_counters = 2, | ||
104 | /* | ||
105 | * Events have 40 bits implemented. However they are designed such | ||
106 | * that bits [32-39] are sign extensions of bit 31. As such the | ||
107 | * effective width of a event for P6-like PMU is 32 bits only. | ||
108 | * | ||
109 | * See IA-32 Intel Architecture Software developer manual Vol 3B | ||
110 | */ | ||
111 | .cntval_bits = 32, | ||
112 | .cntval_mask = (1ULL << 32) - 1, | ||
113 | .get_event_constraints = x86_get_event_constraints, | ||
114 | .event_constraints = p6_event_constraints, | ||
115 | }; | ||
116 | |||
117 | static __init int p6_pmu_init(void) | ||
118 | { | ||
119 | switch (boot_cpu_data.x86_model) { | ||
120 | case 1: | ||
121 | case 3: /* Pentium Pro */ | ||
122 | case 5: | ||
123 | case 6: /* Pentium II */ | ||
124 | case 7: | ||
125 | case 8: | ||
126 | case 11: /* Pentium III */ | ||
127 | case 9: | ||
128 | case 13: | ||
129 | /* Pentium M */ | ||
130 | break; | ||
131 | default: | ||
132 | pr_cont("unsupported p6 CPU model %d ", | ||
133 | boot_cpu_data.x86_model); | ||
134 | return -ENODEV; | ||
135 | } | ||
136 | |||
137 | x86_pmu = p6_pmu; | ||
138 | |||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 898df9719afb..fb329e9f8494 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -115,17 +115,6 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | |||
115 | 115 | ||
116 | return !test_bit(counter, perfctr_nmi_owner); | 116 | return !test_bit(counter, perfctr_nmi_owner); |
117 | } | 117 | } |
118 | |||
119 | /* checks the an msr for availability */ | ||
120 | int avail_to_resrv_perfctr_nmi(unsigned int msr) | ||
121 | { | ||
122 | unsigned int counter; | ||
123 | |||
124 | counter = nmi_perfctr_msr_to_bit(msr); | ||
125 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
126 | |||
127 | return !test_bit(counter, perfctr_nmi_owner); | ||
128 | } | ||
129 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); | 118 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); |
130 | 119 | ||
131 | int reserve_perfctr_nmi(unsigned int msr) | 120 | int reserve_perfctr_nmi(unsigned int msr) |
@@ -691,7 +680,7 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) | |||
691 | cpu_nmi_set_wd_enabled(); | 680 | cpu_nmi_set_wd_enabled(); |
692 | 681 | ||
693 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 682 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
694 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 683 | evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE; |
695 | wrmsr(evntsel_msr, evntsel, 0); | 684 | wrmsr(evntsel_msr, evntsel, 0); |
696 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); | 685 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); |
697 | return 1; | 686 | return 1; |
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c new file mode 100644 index 000000000000..d49079515122 --- /dev/null +++ b/arch/x86/kernel/cpu/scattered.c | |||
@@ -0,0 +1,64 @@ | |||
1 | /* | ||
2 | * Routines to indentify additional cpu features that are scattered in | ||
3 | * cpuid space. | ||
4 | */ | ||
5 | #include <linux/cpu.h> | ||
6 | |||
7 | #include <asm/pat.h> | ||
8 | #include <asm/processor.h> | ||
9 | |||
10 | #include <asm/apic.h> | ||
11 | |||
12 | struct cpuid_bit { | ||
13 | u16 feature; | ||
14 | u8 reg; | ||
15 | u8 bit; | ||
16 | u32 level; | ||
17 | u32 sub_leaf; | ||
18 | }; | ||
19 | |||
20 | enum cpuid_regs { | ||
21 | CR_EAX = 0, | ||
22 | CR_ECX, | ||
23 | CR_EDX, | ||
24 | CR_EBX | ||
25 | }; | ||
26 | |||
27 | void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | ||
28 | { | ||
29 | u32 max_level; | ||
30 | u32 regs[4]; | ||
31 | const struct cpuid_bit *cb; | ||
32 | |||
33 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | ||
34 | { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 }, | ||
35 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, | ||
36 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, | ||
37 | { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, | ||
38 | { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 }, | ||
39 | { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, | ||
40 | { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, | ||
41 | { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 }, | ||
42 | { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, | ||
43 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 }, | ||
44 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, | ||
45 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, | ||
46 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, | ||
47 | { 0, 0, 0, 0, 0 } | ||
48 | }; | ||
49 | |||
50 | for (cb = cpuid_bits; cb->feature; cb++) { | ||
51 | |||
52 | /* Verify that the level is valid */ | ||
53 | max_level = cpuid_eax(cb->level & 0xffff0000); | ||
54 | if (max_level < cb->level || | ||
55 | max_level > (cb->level | 0xffff)) | ||
56 | continue; | ||
57 | |||
58 | cpuid_count(cb->level, cb->sub_leaf, ®s[CR_EAX], | ||
59 | ®s[CR_EBX], ®s[CR_ECX], ®s[CR_EDX]); | ||
60 | |||
61 | if (regs[cb->reg] & (1 << cb->bit)) | ||
62 | set_cpu_cap(c, cb->feature); | ||
63 | } | ||
64 | } | ||
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/topology.c index 468489b57aae..4397e987a1cf 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/topology.c | |||
@@ -1,56 +1,14 @@ | |||
1 | /* | 1 | /* |
2 | * Routines to indentify additional cpu features that are scattered in | 2 | * Check for extended topology enumeration cpuid leaf 0xb and if it |
3 | * cpuid space. | 3 | * exists, use it for populating initial_apicid and cpu topology |
4 | * detection. | ||
4 | */ | 5 | */ |
5 | #include <linux/cpu.h> | ||
6 | 6 | ||
7 | #include <linux/cpu.h> | ||
8 | #include <asm/apic.h> | ||
7 | #include <asm/pat.h> | 9 | #include <asm/pat.h> |
8 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
9 | 11 | ||
10 | #include <asm/apic.h> | ||
11 | |||
12 | struct cpuid_bit { | ||
13 | u16 feature; | ||
14 | u8 reg; | ||
15 | u8 bit; | ||
16 | u32 level; | ||
17 | }; | ||
18 | |||
19 | enum cpuid_regs { | ||
20 | CR_EAX = 0, | ||
21 | CR_ECX, | ||
22 | CR_EDX, | ||
23 | CR_EBX | ||
24 | }; | ||
25 | |||
26 | void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | ||
27 | { | ||
28 | u32 max_level; | ||
29 | u32 regs[4]; | ||
30 | const struct cpuid_bit *cb; | ||
31 | |||
32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | ||
33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, | ||
34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, | ||
35 | { 0, 0, 0, 0 } | ||
36 | }; | ||
37 | |||
38 | for (cb = cpuid_bits; cb->feature; cb++) { | ||
39 | |||
40 | /* Verify that the level is valid */ | ||
41 | max_level = cpuid_eax(cb->level & 0xffff0000); | ||
42 | if (max_level < cb->level || | ||
43 | max_level > (cb->level | 0xffff)) | ||
44 | continue; | ||
45 | |||
46 | cpuid(cb->level, ®s[CR_EAX], ®s[CR_EBX], | ||
47 | ®s[CR_ECX], ®s[CR_EDX]); | ||
48 | |||
49 | if (regs[cb->reg] & (1 << cb->bit)) | ||
50 | set_cpu_cap(c, cb->feature); | ||
51 | } | ||
52 | } | ||
53 | |||
54 | /* leaf 0xb SMT level */ | 12 | /* leaf 0xb SMT level */ |
55 | #define SMT_LEVEL 0 | 13 | #define SMT_LEVEL 0 |
56 | 14 | ||
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 1cbed97b59cf..227b0448960d 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -22,9 +22,10 @@ | |||
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/dmi.h> | 24 | #include <linux/dmi.h> |
25 | #include <linux/module.h> | ||
25 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
26 | #include <asm/vmware.h> | ||
27 | #include <asm/x86_init.h> | 27 | #include <asm/x86_init.h> |
28 | #include <asm/hypervisor.h> | ||
28 | 29 | ||
29 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 | 30 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 |
30 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 | 31 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 |
@@ -50,7 +51,7 @@ static inline int __vmware_platform(void) | |||
50 | 51 | ||
51 | static unsigned long vmware_get_tsc_khz(void) | 52 | static unsigned long vmware_get_tsc_khz(void) |
52 | { | 53 | { |
53 | uint64_t tsc_hz; | 54 | uint64_t tsc_hz, lpj; |
54 | uint32_t eax, ebx, ecx, edx; | 55 | uint32_t eax, ebx, ecx, edx; |
55 | 56 | ||
56 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); | 57 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); |
@@ -61,10 +62,17 @@ static unsigned long vmware_get_tsc_khz(void) | |||
61 | printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", | 62 | printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", |
62 | (unsigned long) tsc_hz / 1000, | 63 | (unsigned long) tsc_hz / 1000, |
63 | (unsigned long) tsc_hz % 1000); | 64 | (unsigned long) tsc_hz % 1000); |
65 | |||
66 | if (!preset_lpj) { | ||
67 | lpj = ((u64)tsc_hz * 1000); | ||
68 | do_div(lpj, HZ); | ||
69 | preset_lpj = lpj; | ||
70 | } | ||
71 | |||
64 | return tsc_hz; | 72 | return tsc_hz; |
65 | } | 73 | } |
66 | 74 | ||
67 | void __init vmware_platform_setup(void) | 75 | static void __init vmware_platform_setup(void) |
68 | { | 76 | { |
69 | uint32_t eax, ebx, ecx, edx; | 77 | uint32_t eax, ebx, ecx, edx; |
70 | 78 | ||
@@ -82,24 +90,21 @@ void __init vmware_platform_setup(void) | |||
82 | * serial key should be enough, as this will always have a VMware | 90 | * serial key should be enough, as this will always have a VMware |
83 | * specific string when running under VMware hypervisor. | 91 | * specific string when running under VMware hypervisor. |
84 | */ | 92 | */ |
85 | int vmware_platform(void) | 93 | static bool __init vmware_platform(void) |
86 | { | 94 | { |
87 | if (cpu_has_hypervisor) { | 95 | if (cpu_has_hypervisor) { |
88 | unsigned int eax, ebx, ecx, edx; | 96 | unsigned int eax; |
89 | char hyper_vendor_id[13]; | 97 | unsigned int hyper_vendor_id[3]; |
90 | 98 | ||
91 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx); | 99 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0], |
92 | memcpy(hyper_vendor_id + 0, &ebx, 4); | 100 | &hyper_vendor_id[1], &hyper_vendor_id[2]); |
93 | memcpy(hyper_vendor_id + 4, &ecx, 4); | 101 | if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) |
94 | memcpy(hyper_vendor_id + 8, &edx, 4); | 102 | return true; |
95 | hyper_vendor_id[12] = '\0'; | ||
96 | if (!strcmp(hyper_vendor_id, "VMwareVMware")) | ||
97 | return 1; | ||
98 | } else if (dmi_available && dmi_name_in_serial("VMware") && | 103 | } else if (dmi_available && dmi_name_in_serial("VMware") && |
99 | __vmware_platform()) | 104 | __vmware_platform()) |
100 | return 1; | 105 | return true; |
101 | 106 | ||
102 | return 0; | 107 | return false; |
103 | } | 108 | } |
104 | 109 | ||
105 | /* | 110 | /* |
@@ -114,8 +119,16 @@ int vmware_platform(void) | |||
114 | * so that the kernel could just trust the hypervisor with providing a | 119 | * so that the kernel could just trust the hypervisor with providing a |
115 | * reliable virtual TSC that is suitable for timekeeping. | 120 | * reliable virtual TSC that is suitable for timekeeping. |
116 | */ | 121 | */ |
117 | void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c) | 122 | static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c) |
118 | { | 123 | { |
119 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 124 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
120 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); | 125 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); |
121 | } | 126 | } |
127 | |||
128 | const __refconst struct hypervisor_x86 x86_hyper_vmware = { | ||
129 | .name = "VMware", | ||
130 | .detect = vmware_platform, | ||
131 | .set_cpu_features = vmware_set_cpu_features, | ||
132 | .init_platform = vmware_platform_setup, | ||
133 | }; | ||
134 | EXPORT_SYMBOL(x86_hyper_vmware); | ||