diff options
author | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-10-23 01:01:49 -0400 |
---|---|---|
committer | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-10-23 01:01:49 -0400 |
commit | 3dd41424090a0ca3a660218d06afe6ff4441bad3 (patch) | |
tree | 511ef1bb1799027fc5aad574adce49120ecadd87 /arch/x86/kernel/cpu | |
parent | 5c5456402d467969b217d7fdd6670f8c8600f5a8 (diff) | |
parent | f6f94e2ab1b33f0082ac22d71f66385a60d8157f (diff) |
Merge commit 'v2.6.36' into wip-merge-2.6.36
Conflicts:
Makefile
arch/x86/include/asm/unistd_32.h
arch/x86/kernel/syscall_table_32.S
kernel/sched.c
kernel/time/tick-sched.c
Relevant API and functions changes (solved in this commit):
- (API) .enqueue_task() (enqueue_task_litmus),
dequeue_task() (dequeue_task_litmus),
[litmus/sched_litmus.c]
- (API) .select_task_rq() (select_task_rq_litmus)
[litmus/sched_litmus.c]
- (API) sysrq_dump_trace_buffer() and sysrq_handle_kill_rt_tasks()
[litmus/sched_trace.c]
- struct kfifo internal buffer name changed (buffer -> buf)
[litmus/sched_trace.c]
- add_wait_queue_exclusive_locked -> __add_wait_queue_tail_exclusive
[litmus/fmlp.c]
- syscall numbers for both x86_32 and x86_64
Diffstat (limited to 'arch/x86/kernel/cpu')
43 files changed, 3861 insertions, 1154 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c202b62f3671..3f0ebe429a01 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -12,11 +12,11 @@ endif | |||
12 | nostackp := $(call cc-option, -fno-stack-protector) | 12 | nostackp := $(call cc-option, -fno-stack-protector) |
13 | CFLAGS_common.o := $(nostackp) | 13 | CFLAGS_common.o := $(nostackp) |
14 | 14 | ||
15 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 15 | obj-y := intel_cacheinfo.o scattered.o topology.o |
16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
17 | obj-y += vmware.o hypervisor.o sched.o | 17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o |
18 | 18 | ||
19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 19 | obj-$(CONFIG_X86_32) += bugs.o |
20 | obj-$(CONFIG_X86_64) += bugs_64.o | 20 | obj-$(CONFIG_X86_64) += bugs_64.o |
21 | 21 | ||
22 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o | 22 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e485825130d2..ba5f62f45f01 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -466,7 +466,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
466 | } | 466 | } |
467 | 467 | ||
468 | } | 468 | } |
469 | if (c->x86 == 0x10 || c->x86 == 0x11) | 469 | if (c->x86 >= 0x10) |
470 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 470 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
471 | 471 | ||
472 | /* get apicid instead of initial apic id from cpuid */ | 472 | /* get apicid instead of initial apic id from cpuid */ |
@@ -529,7 +529,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
529 | num_cache_leaves = 3; | 529 | num_cache_leaves = 3; |
530 | } | 530 | } |
531 | 531 | ||
532 | if (c->x86 >= 0xf && c->x86 <= 0x11) | 532 | if (c->x86 >= 0xf) |
533 | set_cpu_cap(c, X86_FEATURE_K8); | 533 | set_cpu_cap(c, X86_FEATURE_K8); |
534 | 534 | ||
535 | if (cpu_has_xmm2) { | 535 | if (cpu_has_xmm2) { |
@@ -546,7 +546,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
546 | fam10h_check_enable_mmcfg(); | 546 | fam10h_check_enable_mmcfg(); |
547 | } | 547 | } |
548 | 548 | ||
549 | if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) { | 549 | if (c == &boot_cpu_data && c->x86 >= 0xf) { |
550 | unsigned long long tseg; | 550 | unsigned long long tseg; |
551 | 551 | ||
552 | /* | 552 | /* |
@@ -609,3 +609,74 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { | |||
609 | }; | 609 | }; |
610 | 610 | ||
611 | cpu_dev_register(amd_cpu_dev); | 611 | cpu_dev_register(amd_cpu_dev); |
612 | |||
613 | /* | ||
614 | * AMD errata checking | ||
615 | * | ||
616 | * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or | ||
617 | * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that | ||
618 | * have an OSVW id assigned, which it takes as first argument. Both take a | ||
619 | * variable number of family-specific model-stepping ranges created by | ||
620 | * AMD_MODEL_RANGE(). Each erratum also has to be declared as extern const | ||
621 | * int[] in arch/x86/include/asm/processor.h. | ||
622 | * | ||
623 | * Example: | ||
624 | * | ||
625 | * const int amd_erratum_319[] = | ||
626 | * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2), | ||
627 | * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0), | ||
628 | * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0)); | ||
629 | */ | ||
630 | |||
631 | const int amd_erratum_400[] = | ||
632 | AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), | ||
633 | AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); | ||
634 | EXPORT_SYMBOL_GPL(amd_erratum_400); | ||
635 | |||
636 | const int amd_erratum_383[] = | ||
637 | AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); | ||
638 | EXPORT_SYMBOL_GPL(amd_erratum_383); | ||
639 | |||
640 | bool cpu_has_amd_erratum(const int *erratum) | ||
641 | { | ||
642 | struct cpuinfo_x86 *cpu = ¤t_cpu_data; | ||
643 | int osvw_id = *erratum++; | ||
644 | u32 range; | ||
645 | u32 ms; | ||
646 | |||
647 | /* | ||
648 | * If called early enough that current_cpu_data hasn't been initialized | ||
649 | * yet, fall back to boot_cpu_data. | ||
650 | */ | ||
651 | if (cpu->x86 == 0) | ||
652 | cpu = &boot_cpu_data; | ||
653 | |||
654 | if (cpu->x86_vendor != X86_VENDOR_AMD) | ||
655 | return false; | ||
656 | |||
657 | if (osvw_id >= 0 && osvw_id < 65536 && | ||
658 | cpu_has(cpu, X86_FEATURE_OSVW)) { | ||
659 | u64 osvw_len; | ||
660 | |||
661 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len); | ||
662 | if (osvw_id < osvw_len) { | ||
663 | u64 osvw_bits; | ||
664 | |||
665 | rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6), | ||
666 | osvw_bits); | ||
667 | return osvw_bits & (1ULL << (osvw_id & 0x3f)); | ||
668 | } | ||
669 | } | ||
670 | |||
671 | /* OSVW unavailable or ID unknown, match family-model-stepping range */ | ||
672 | ms = (cpu->x86_model << 4) | cpu->x86_mask; | ||
673 | while ((range = *erratum++)) | ||
674 | if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && | ||
675 | (ms >= AMD_MODEL_RANGE_START(range)) && | ||
676 | (ms <= AMD_MODEL_RANGE_END(range))) | ||
677 | return true; | ||
678 | |||
679 | return false; | ||
680 | } | ||
681 | |||
682 | EXPORT_SYMBOL_GPL(cpu_has_amd_erratum); | ||
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 01a265212395..c39576cb3018 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -86,7 +86,7 @@ static void __init check_fpu(void) | |||
86 | 86 | ||
87 | static void __init check_hlt(void) | 87 | static void __init check_hlt(void) |
88 | { | 88 | { |
89 | if (paravirt_enabled()) | 89 | if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) |
90 | return; | 90 | return; |
91 | 91 | ||
92 | printk(KERN_INFO "Checking 'hlt' instruction... "); | 92 | printk(KERN_INFO "Checking 'hlt' instruction... "); |
diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c deleted file mode 100644 index 2056ccf572cc..000000000000 --- a/arch/x86/kernel/cpu/cmpxchg.c +++ /dev/null | |||
@@ -1,72 +0,0 @@ | |||
1 | /* | ||
2 | * cmpxchg*() fallbacks for CPU not supporting these instructions | ||
3 | */ | ||
4 | |||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/smp.h> | ||
7 | #include <linux/module.h> | ||
8 | |||
9 | #ifndef CONFIG_X86_CMPXCHG | ||
10 | unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) | ||
11 | { | ||
12 | u8 prev; | ||
13 | unsigned long flags; | ||
14 | |||
15 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
16 | local_irq_save(flags); | ||
17 | prev = *(u8 *)ptr; | ||
18 | if (prev == old) | ||
19 | *(u8 *)ptr = new; | ||
20 | local_irq_restore(flags); | ||
21 | return prev; | ||
22 | } | ||
23 | EXPORT_SYMBOL(cmpxchg_386_u8); | ||
24 | |||
25 | unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) | ||
26 | { | ||
27 | u16 prev; | ||
28 | unsigned long flags; | ||
29 | |||
30 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
31 | local_irq_save(flags); | ||
32 | prev = *(u16 *)ptr; | ||
33 | if (prev == old) | ||
34 | *(u16 *)ptr = new; | ||
35 | local_irq_restore(flags); | ||
36 | return prev; | ||
37 | } | ||
38 | EXPORT_SYMBOL(cmpxchg_386_u16); | ||
39 | |||
40 | unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) | ||
41 | { | ||
42 | u32 prev; | ||
43 | unsigned long flags; | ||
44 | |||
45 | /* Poor man's cmpxchg for 386. Unsuitable for SMP */ | ||
46 | local_irq_save(flags); | ||
47 | prev = *(u32 *)ptr; | ||
48 | if (prev == old) | ||
49 | *(u32 *)ptr = new; | ||
50 | local_irq_restore(flags); | ||
51 | return prev; | ||
52 | } | ||
53 | EXPORT_SYMBOL(cmpxchg_386_u32); | ||
54 | #endif | ||
55 | |||
56 | #ifndef CONFIG_X86_CMPXCHG64 | ||
57 | unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new) | ||
58 | { | ||
59 | u64 prev; | ||
60 | unsigned long flags; | ||
61 | |||
62 | /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */ | ||
63 | local_irq_save(flags); | ||
64 | prev = *(u64 *)ptr; | ||
65 | if (prev == old) | ||
66 | *(u64 *)ptr = new; | ||
67 | local_irq_restore(flags); | ||
68 | return prev; | ||
69 | } | ||
70 | EXPORT_SYMBOL(cmpxchg_486_u64); | ||
71 | #endif | ||
72 | |||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4868e4a951ee..f2f9ac7da25c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -140,10 +140,18 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | |||
140 | static int __init x86_xsave_setup(char *s) | 140 | static int __init x86_xsave_setup(char *s) |
141 | { | 141 | { |
142 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | 142 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); |
143 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
143 | return 1; | 144 | return 1; |
144 | } | 145 | } |
145 | __setup("noxsave", x86_xsave_setup); | 146 | __setup("noxsave", x86_xsave_setup); |
146 | 147 | ||
148 | static int __init x86_xsaveopt_setup(char *s) | ||
149 | { | ||
150 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
151 | return 1; | ||
152 | } | ||
153 | __setup("noxsaveopt", x86_xsaveopt_setup); | ||
154 | |||
147 | #ifdef CONFIG_X86_32 | 155 | #ifdef CONFIG_X86_32 |
148 | static int cachesize_override __cpuinitdata = -1; | 156 | static int cachesize_override __cpuinitdata = -1; |
149 | static int disable_x86_serial_nr __cpuinitdata = 1; | 157 | static int disable_x86_serial_nr __cpuinitdata = 1; |
@@ -537,7 +545,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) | |||
537 | } | 545 | } |
538 | } | 546 | } |
539 | 547 | ||
540 | static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | 548 | void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) |
541 | { | 549 | { |
542 | u32 tfms, xlvl; | 550 | u32 tfms, xlvl; |
543 | u32 ebx; | 551 | u32 ebx; |
@@ -551,6 +559,16 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | |||
551 | c->x86_capability[4] = excap; | 559 | c->x86_capability[4] = excap; |
552 | } | 560 | } |
553 | 561 | ||
562 | /* Additional Intel-defined flags: level 0x00000007 */ | ||
563 | if (c->cpuid_level >= 0x00000007) { | ||
564 | u32 eax, ebx, ecx, edx; | ||
565 | |||
566 | cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); | ||
567 | |||
568 | if (eax > 0) | ||
569 | c->x86_capability[9] = ebx; | ||
570 | } | ||
571 | |||
554 | /* AMD-defined flags: level 0x80000001 */ | 572 | /* AMD-defined flags: level 0x80000001 */ |
555 | xlvl = cpuid_eax(0x80000000); | 573 | xlvl = cpuid_eax(0x80000000); |
556 | c->extended_cpuid_level = xlvl; | 574 | c->extended_cpuid_level = xlvl; |
@@ -576,6 +594,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | |||
576 | if (c->extended_cpuid_level >= 0x80000007) | 594 | if (c->extended_cpuid_level >= 0x80000007) |
577 | c->x86_power = cpuid_edx(0x80000007); | 595 | c->x86_power = cpuid_edx(0x80000007); |
578 | 596 | ||
597 | init_scattered_cpuid_features(c); | ||
579 | } | 598 | } |
580 | 599 | ||
581 | static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) | 600 | static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) |
@@ -731,7 +750,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
731 | 750 | ||
732 | get_model_name(c); /* Default name */ | 751 | get_model_name(c); /* Default name */ |
733 | 752 | ||
734 | init_scattered_cpuid_features(c); | ||
735 | detect_nopl(c); | 753 | detect_nopl(c); |
736 | } | 754 | } |
737 | 755 | ||
@@ -1084,6 +1102,20 @@ static void clear_all_debug_regs(void) | |||
1084 | } | 1102 | } |
1085 | } | 1103 | } |
1086 | 1104 | ||
1105 | #ifdef CONFIG_KGDB | ||
1106 | /* | ||
1107 | * Restore debug regs if using kgdbwait and you have a kernel debugger | ||
1108 | * connection established. | ||
1109 | */ | ||
1110 | static void dbg_restore_debug_regs(void) | ||
1111 | { | ||
1112 | if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break)) | ||
1113 | arch_kgdb_ops.correct_hw_break(); | ||
1114 | } | ||
1115 | #else /* ! CONFIG_KGDB */ | ||
1116 | #define dbg_restore_debug_regs() | ||
1117 | #endif /* ! CONFIG_KGDB */ | ||
1118 | |||
1087 | /* | 1119 | /* |
1088 | * cpu_init() initializes state that is per-CPU. Some data is already | 1120 | * cpu_init() initializes state that is per-CPU. Some data is already |
1089 | * initialized (naturally) in the bootstrap process, such as the GDT | 1121 | * initialized (naturally) in the bootstrap process, such as the GDT |
@@ -1107,9 +1139,9 @@ void __cpuinit cpu_init(void) | |||
1107 | oist = &per_cpu(orig_ist, cpu); | 1139 | oist = &per_cpu(orig_ist, cpu); |
1108 | 1140 | ||
1109 | #ifdef CONFIG_NUMA | 1141 | #ifdef CONFIG_NUMA |
1110 | if (cpu != 0 && percpu_read(node_number) == 0 && | 1142 | if (cpu != 0 && percpu_read(numa_node) == 0 && |
1111 | cpu_to_node(cpu) != NUMA_NO_NODE) | 1143 | early_cpu_to_node(cpu) != NUMA_NO_NODE) |
1112 | percpu_write(node_number, cpu_to_node(cpu)); | 1144 | set_numa_node(early_cpu_to_node(cpu)); |
1113 | #endif | 1145 | #endif |
1114 | 1146 | ||
1115 | me = current; | 1147 | me = current; |
@@ -1174,20 +1206,11 @@ void __cpuinit cpu_init(void) | |||
1174 | load_TR_desc(); | 1206 | load_TR_desc(); |
1175 | load_LDT(&init_mm.context); | 1207 | load_LDT(&init_mm.context); |
1176 | 1208 | ||
1177 | #ifdef CONFIG_KGDB | 1209 | clear_all_debug_regs(); |
1178 | /* | 1210 | dbg_restore_debug_regs(); |
1179 | * If the kgdb is connected no debug regs should be altered. This | ||
1180 | * is only applicable when KGDB and a KGDB I/O module are built | ||
1181 | * into the kernel and you are using early debugging with | ||
1182 | * kgdbwait. KGDB will control the kernel HW breakpoint registers. | ||
1183 | */ | ||
1184 | if (kgdb_connected && arch_kgdb_ops.correct_hw_break) | ||
1185 | arch_kgdb_ops.correct_hw_break(); | ||
1186 | else | ||
1187 | #endif | ||
1188 | clear_all_debug_regs(); | ||
1189 | 1211 | ||
1190 | fpu_init(); | 1212 | fpu_init(); |
1213 | xsave_init(); | ||
1191 | 1214 | ||
1192 | raw_local_save_flags(kernel_eflags); | 1215 | raw_local_save_flags(kernel_eflags); |
1193 | 1216 | ||
@@ -1239,23 +1262,16 @@ void __cpuinit cpu_init(void) | |||
1239 | #endif | 1262 | #endif |
1240 | 1263 | ||
1241 | clear_all_debug_regs(); | 1264 | clear_all_debug_regs(); |
1265 | dbg_restore_debug_regs(); | ||
1242 | 1266 | ||
1243 | /* | 1267 | /* |
1244 | * Force FPU initialization: | 1268 | * Force FPU initialization: |
1245 | */ | 1269 | */ |
1246 | if (cpu_has_xsave) | 1270 | current_thread_info()->status = 0; |
1247 | current_thread_info()->status = TS_XSAVE; | ||
1248 | else | ||
1249 | current_thread_info()->status = 0; | ||
1250 | clear_used_math(); | 1271 | clear_used_math(); |
1251 | mxcsr_feature_mask_init(); | 1272 | mxcsr_feature_mask_init(); |
1252 | 1273 | ||
1253 | /* | 1274 | fpu_init(); |
1254 | * Boot processor to setup the FP and extended state context info. | ||
1255 | */ | ||
1256 | if (smp_processor_id() == boot_cpu_id) | ||
1257 | init_thread_xstate(); | ||
1258 | |||
1259 | xsave_init(); | 1275 | xsave_init(); |
1260 | } | 1276 | } |
1261 | #endif | 1277 | #endif |
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 3624e8a0f71b..f668bb1f7d43 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
@@ -33,5 +33,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], | |||
33 | *const __x86_cpu_dev_end[]; | 33 | *const __x86_cpu_dev_end[]; |
34 | 34 | ||
35 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); | 35 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); |
36 | extern void get_cpu_cap(struct cpuinfo_x86 *c); | ||
36 | 37 | ||
37 | #endif | 38 | #endif |
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile index 1840c0a5170b..bd54bf67e6fb 100644 --- a/arch/x86/kernel/cpu/cpufreq/Makefile +++ b/arch/x86/kernel/cpu/cpufreq/Makefile | |||
@@ -2,8 +2,8 @@ | |||
2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. | 2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. |
3 | # speedstep-* is preferred over p4-clockmod. | 3 | # speedstep-* is preferred over p4-clockmod. |
4 | 4 | ||
5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o | 5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o |
6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o | 6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o |
7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o | 7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o |
8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o | 8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o |
9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o | 9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 459168083b77..cd8da247dda1 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -34,7 +34,6 @@ | |||
34 | #include <linux/compiler.h> | 34 | #include <linux/compiler.h> |
35 | #include <linux/dmi.h> | 35 | #include <linux/dmi.h> |
36 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
37 | #include <trace/events/power.h> | ||
38 | 37 | ||
39 | #include <linux/acpi.h> | 38 | #include <linux/acpi.h> |
40 | #include <linux/io.h> | 39 | #include <linux/io.h> |
@@ -46,6 +45,7 @@ | |||
46 | #include <asm/msr.h> | 45 | #include <asm/msr.h> |
47 | #include <asm/processor.h> | 46 | #include <asm/processor.h> |
48 | #include <asm/cpufeature.h> | 47 | #include <asm/cpufeature.h> |
48 | #include "mperf.h" | ||
49 | 49 | ||
50 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | 50 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ |
51 | "acpi-cpufreq", msg) | 51 | "acpi-cpufreq", msg) |
@@ -71,10 +71,8 @@ struct acpi_cpufreq_data { | |||
71 | 71 | ||
72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); | 72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); |
73 | 73 | ||
74 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
75 | |||
76 | /* acpi_perf_data is a pointer to percpu data. */ | 74 | /* acpi_perf_data is a pointer to percpu data. */ |
77 | static struct acpi_processor_performance *acpi_perf_data; | 75 | static struct acpi_processor_performance __percpu *acpi_perf_data; |
78 | 76 | ||
79 | static struct cpufreq_driver acpi_cpufreq_driver; | 77 | static struct cpufreq_driver acpi_cpufreq_driver; |
80 | 78 | ||
@@ -240,45 +238,6 @@ static u32 get_cur_val(const struct cpumask *mask) | |||
240 | return cmd.val; | 238 | return cmd.val; |
241 | } | 239 | } |
242 | 240 | ||
243 | /* Called via smp_call_function_single(), on the target CPU */ | ||
244 | static void read_measured_perf_ctrs(void *_cur) | ||
245 | { | ||
246 | struct aperfmperf *am = _cur; | ||
247 | |||
248 | get_aperfmperf(am); | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Return the measured active (C0) frequency on this CPU since last call | ||
253 | * to this function. | ||
254 | * Input: cpu number | ||
255 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
256 | * | ||
257 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
258 | * over a period of time, while CPU is in C0 state. | ||
259 | * IA32_MPERF counts at the rate of max advertised frequency | ||
260 | * IA32_APERF counts at the rate of actual CPU frequency | ||
261 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
262 | * no meaning should be associated with absolute values of these MSRs. | ||
263 | */ | ||
264 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, | ||
265 | unsigned int cpu) | ||
266 | { | ||
267 | struct aperfmperf perf; | ||
268 | unsigned long ratio; | ||
269 | unsigned int retval; | ||
270 | |||
271 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
272 | return 0; | ||
273 | |||
274 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
275 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
276 | |||
277 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
278 | |||
279 | return retval; | ||
280 | } | ||
281 | |||
282 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | 241 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) |
283 | { | 242 | { |
284 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); | 243 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); |
@@ -364,8 +323,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
364 | } | 323 | } |
365 | } | 324 | } |
366 | 325 | ||
367 | trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency); | ||
368 | |||
369 | switch (data->cpu_feature) { | 326 | switch (data->cpu_feature) { |
370 | case SYSTEM_INTEL_MSR_CAPABLE: | 327 | case SYSTEM_INTEL_MSR_CAPABLE: |
371 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | 328 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; |
@@ -391,7 +348,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
391 | 348 | ||
392 | freqs.old = perf->states[perf->state].core_frequency * 1000; | 349 | freqs.old = perf->states[perf->state].core_frequency * 1000; |
393 | freqs.new = data->freq_table[next_state].frequency; | 350 | freqs.new = data->freq_table[next_state].frequency; |
394 | for_each_cpu(i, cmd.mask) { | 351 | for_each_cpu(i, policy->cpus) { |
395 | freqs.cpu = i; | 352 | freqs.cpu = i; |
396 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 353 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
397 | } | 354 | } |
@@ -407,7 +364,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
407 | } | 364 | } |
408 | } | 365 | } |
409 | 366 | ||
410 | for_each_cpu(i, cmd.mask) { | 367 | for_each_cpu(i, policy->cpus) { |
411 | freqs.cpu = i; | 368 | freqs.cpu = i; |
412 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 369 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
413 | } | 370 | } |
@@ -702,7 +659,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
702 | 659 | ||
703 | /* Check for APERF/MPERF support in hardware */ | 660 | /* Check for APERF/MPERF support in hardware */ |
704 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | 661 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) |
705 | acpi_cpufreq_driver.getavg = get_measured_perf; | 662 | acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf; |
706 | 663 | ||
707 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); | 664 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); |
708 | for (i = 0; i < perf->state_count; i++) | 665 | for (i = 0; i < perf->state_count; i++) |
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index 16e3483be9e3..32974cf84232 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c | |||
@@ -169,12 +169,9 @@ static int gx_freq_mult[16] = { | |||
169 | * Low Level chipset interface * | 169 | * Low Level chipset interface * |
170 | ****************************************************************/ | 170 | ****************************************************************/ |
171 | static struct pci_device_id gx_chipset_tbl[] __initdata = { | 171 | static struct pci_device_id gx_chipset_tbl[] __initdata = { |
172 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, | 172 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY), }, |
173 | PCI_ANY_ID, PCI_ANY_ID }, | 173 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), }, |
174 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, | 174 | { PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), }, |
175 | PCI_ANY_ID, PCI_ANY_ID }, | ||
176 | { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, | ||
177 | PCI_ANY_ID, PCI_ANY_ID }, | ||
178 | { 0, }, | 175 | { 0, }, |
179 | }; | 176 | }; |
180 | 177 | ||
@@ -199,7 +196,7 @@ static __init struct pci_dev *gx_detect_chipset(void) | |||
199 | } | 196 | } |
200 | 197 | ||
201 | /* detect which companion chip is used */ | 198 | /* detect which companion chip is used */ |
202 | while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { | 199 | for_each_pci_dev(gx_pci) { |
203 | if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) | 200 | if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) |
204 | return gx_pci; | 201 | return gx_pci; |
205 | } | 202 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 7e7eea4f8261..03162dac6271 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c | |||
@@ -426,7 +426,7 @@ static int guess_fsb(int mult) | |||
426 | } | 426 | } |
427 | 427 | ||
428 | 428 | ||
429 | static int __init longhaul_get_ranges(void) | 429 | static int __cpuinit longhaul_get_ranges(void) |
430 | { | 430 | { |
431 | unsigned int i, j, k = 0; | 431 | unsigned int i, j, k = 0; |
432 | unsigned int ratio; | 432 | unsigned int ratio; |
@@ -530,7 +530,7 @@ static int __init longhaul_get_ranges(void) | |||
530 | } | 530 | } |
531 | 531 | ||
532 | 532 | ||
533 | static void __init longhaul_setup_voltagescaling(void) | 533 | static void __cpuinit longhaul_setup_voltagescaling(void) |
534 | { | 534 | { |
535 | union msr_longhaul longhaul; | 535 | union msr_longhaul longhaul; |
536 | struct mV_pos minvid, maxvid, vid; | 536 | struct mV_pos minvid, maxvid, vid; |
@@ -784,7 +784,7 @@ static int longhaul_setup_southbridge(void) | |||
784 | return 0; | 784 | return 0; |
785 | } | 785 | } |
786 | 786 | ||
787 | static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | 787 | static int __cpuinit longhaul_cpu_init(struct cpufreq_policy *policy) |
788 | { | 788 | { |
789 | struct cpuinfo_x86 *c = &cpu_data(0); | 789 | struct cpuinfo_x86 *c = &cpu_data(0); |
790 | char *cpuname = NULL; | 790 | char *cpuname = NULL; |
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.h b/arch/x86/kernel/cpu/cpufreq/longhaul.h index e2360a469f79..cbf48fbca881 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.h +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.h | |||
@@ -56,7 +56,7 @@ union msr_longhaul { | |||
56 | /* | 56 | /* |
57 | * VIA C3 Samuel 1 & Samuel 2 (stepping 0) | 57 | * VIA C3 Samuel 1 & Samuel 2 (stepping 0) |
58 | */ | 58 | */ |
59 | static const int __initdata samuel1_mults[16] = { | 59 | static const int __cpuinitdata samuel1_mults[16] = { |
60 | -1, /* 0000 -> RESERVED */ | 60 | -1, /* 0000 -> RESERVED */ |
61 | 30, /* 0001 -> 3.0x */ | 61 | 30, /* 0001 -> 3.0x */ |
62 | 40, /* 0010 -> 4.0x */ | 62 | 40, /* 0010 -> 4.0x */ |
@@ -75,7 +75,7 @@ static const int __initdata samuel1_mults[16] = { | |||
75 | -1, /* 1111 -> RESERVED */ | 75 | -1, /* 1111 -> RESERVED */ |
76 | }; | 76 | }; |
77 | 77 | ||
78 | static const int __initdata samuel1_eblcr[16] = { | 78 | static const int __cpuinitdata samuel1_eblcr[16] = { |
79 | 50, /* 0000 -> RESERVED */ | 79 | 50, /* 0000 -> RESERVED */ |
80 | 30, /* 0001 -> 3.0x */ | 80 | 30, /* 0001 -> 3.0x */ |
81 | 40, /* 0010 -> 4.0x */ | 81 | 40, /* 0010 -> 4.0x */ |
@@ -97,7 +97,7 @@ static const int __initdata samuel1_eblcr[16] = { | |||
97 | /* | 97 | /* |
98 | * VIA C3 Samuel2 Stepping 1->15 | 98 | * VIA C3 Samuel2 Stepping 1->15 |
99 | */ | 99 | */ |
100 | static const int __initdata samuel2_eblcr[16] = { | 100 | static const int __cpuinitdata samuel2_eblcr[16] = { |
101 | 50, /* 0000 -> 5.0x */ | 101 | 50, /* 0000 -> 5.0x */ |
102 | 30, /* 0001 -> 3.0x */ | 102 | 30, /* 0001 -> 3.0x */ |
103 | 40, /* 0010 -> 4.0x */ | 103 | 40, /* 0010 -> 4.0x */ |
@@ -119,7 +119,7 @@ static const int __initdata samuel2_eblcr[16] = { | |||
119 | /* | 119 | /* |
120 | * VIA C3 Ezra | 120 | * VIA C3 Ezra |
121 | */ | 121 | */ |
122 | static const int __initdata ezra_mults[16] = { | 122 | static const int __cpuinitdata ezra_mults[16] = { |
123 | 100, /* 0000 -> 10.0x */ | 123 | 100, /* 0000 -> 10.0x */ |
124 | 30, /* 0001 -> 3.0x */ | 124 | 30, /* 0001 -> 3.0x */ |
125 | 40, /* 0010 -> 4.0x */ | 125 | 40, /* 0010 -> 4.0x */ |
@@ -138,7 +138,7 @@ static const int __initdata ezra_mults[16] = { | |||
138 | 120, /* 1111 -> 12.0x */ | 138 | 120, /* 1111 -> 12.0x */ |
139 | }; | 139 | }; |
140 | 140 | ||
141 | static const int __initdata ezra_eblcr[16] = { | 141 | static const int __cpuinitdata ezra_eblcr[16] = { |
142 | 50, /* 0000 -> 5.0x */ | 142 | 50, /* 0000 -> 5.0x */ |
143 | 30, /* 0001 -> 3.0x */ | 143 | 30, /* 0001 -> 3.0x */ |
144 | 40, /* 0010 -> 4.0x */ | 144 | 40, /* 0010 -> 4.0x */ |
@@ -160,7 +160,7 @@ static const int __initdata ezra_eblcr[16] = { | |||
160 | /* | 160 | /* |
161 | * VIA C3 (Ezra-T) [C5M]. | 161 | * VIA C3 (Ezra-T) [C5M]. |
162 | */ | 162 | */ |
163 | static const int __initdata ezrat_mults[32] = { | 163 | static const int __cpuinitdata ezrat_mults[32] = { |
164 | 100, /* 0000 -> 10.0x */ | 164 | 100, /* 0000 -> 10.0x */ |
165 | 30, /* 0001 -> 3.0x */ | 165 | 30, /* 0001 -> 3.0x */ |
166 | 40, /* 0010 -> 4.0x */ | 166 | 40, /* 0010 -> 4.0x */ |
@@ -196,7 +196,7 @@ static const int __initdata ezrat_mults[32] = { | |||
196 | -1, /* 1111 -> RESERVED (12.0x) */ | 196 | -1, /* 1111 -> RESERVED (12.0x) */ |
197 | }; | 197 | }; |
198 | 198 | ||
199 | static const int __initdata ezrat_eblcr[32] = { | 199 | static const int __cpuinitdata ezrat_eblcr[32] = { |
200 | 50, /* 0000 -> 5.0x */ | 200 | 50, /* 0000 -> 5.0x */ |
201 | 30, /* 0001 -> 3.0x */ | 201 | 30, /* 0001 -> 3.0x */ |
202 | 40, /* 0010 -> 4.0x */ | 202 | 40, /* 0010 -> 4.0x */ |
@@ -235,7 +235,7 @@ static const int __initdata ezrat_eblcr[32] = { | |||
235 | /* | 235 | /* |
236 | * VIA C3 Nehemiah */ | 236 | * VIA C3 Nehemiah */ |
237 | 237 | ||
238 | static const int __initdata nehemiah_mults[32] = { | 238 | static const int __cpuinitdata nehemiah_mults[32] = { |
239 | 100, /* 0000 -> 10.0x */ | 239 | 100, /* 0000 -> 10.0x */ |
240 | -1, /* 0001 -> 16.0x */ | 240 | -1, /* 0001 -> 16.0x */ |
241 | 40, /* 0010 -> 4.0x */ | 241 | 40, /* 0010 -> 4.0x */ |
@@ -270,7 +270,7 @@ static const int __initdata nehemiah_mults[32] = { | |||
270 | -1, /* 1111 -> 12.0x */ | 270 | -1, /* 1111 -> 12.0x */ |
271 | }; | 271 | }; |
272 | 272 | ||
273 | static const int __initdata nehemiah_eblcr[32] = { | 273 | static const int __cpuinitdata nehemiah_eblcr[32] = { |
274 | 50, /* 0000 -> 5.0x */ | 274 | 50, /* 0000 -> 5.0x */ |
275 | 160, /* 0001 -> 16.0x */ | 275 | 160, /* 0001 -> 16.0x */ |
276 | 40, /* 0010 -> 4.0x */ | 276 | 40, /* 0010 -> 4.0x */ |
@@ -315,7 +315,7 @@ struct mV_pos { | |||
315 | unsigned short pos; | 315 | unsigned short pos; |
316 | }; | 316 | }; |
317 | 317 | ||
318 | static const struct mV_pos __initdata vrm85_mV[32] = { | 318 | static const struct mV_pos __cpuinitdata vrm85_mV[32] = { |
319 | {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, | 319 | {1250, 8}, {1200, 6}, {1150, 4}, {1100, 2}, |
320 | {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, | 320 | {1050, 0}, {1800, 30}, {1750, 28}, {1700, 26}, |
321 | {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, | 321 | {1650, 24}, {1600, 22}, {1550, 20}, {1500, 18}, |
@@ -326,14 +326,14 @@ static const struct mV_pos __initdata vrm85_mV[32] = { | |||
326 | {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} | 326 | {1475, 17}, {1425, 15}, {1375, 13}, {1325, 11} |
327 | }; | 327 | }; |
328 | 328 | ||
329 | static const unsigned char __initdata mV_vrm85[32] = { | 329 | static const unsigned char __cpuinitdata mV_vrm85[32] = { |
330 | 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, | 330 | 0x04, 0x14, 0x03, 0x13, 0x02, 0x12, 0x01, 0x11, |
331 | 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, | 331 | 0x00, 0x10, 0x0f, 0x1f, 0x0e, 0x1e, 0x0d, 0x1d, |
332 | 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, | 332 | 0x0c, 0x1c, 0x0b, 0x1b, 0x0a, 0x1a, 0x09, 0x19, |
333 | 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 | 333 | 0x08, 0x18, 0x07, 0x17, 0x06, 0x16, 0x05, 0x15 |
334 | }; | 334 | }; |
335 | 335 | ||
336 | static const struct mV_pos __initdata mobilevrm_mV[32] = { | 336 | static const struct mV_pos __cpuinitdata mobilevrm_mV[32] = { |
337 | {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, | 337 | {1750, 31}, {1700, 30}, {1650, 29}, {1600, 28}, |
338 | {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, | 338 | {1550, 27}, {1500, 26}, {1450, 25}, {1400, 24}, |
339 | {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, | 339 | {1350, 23}, {1300, 22}, {1250, 21}, {1200, 20}, |
@@ -344,7 +344,7 @@ static const struct mV_pos __initdata mobilevrm_mV[32] = { | |||
344 | {675, 3}, {650, 2}, {625, 1}, {600, 0} | 344 | {675, 3}, {650, 2}, {625, 1}, {600, 0} |
345 | }; | 345 | }; |
346 | 346 | ||
347 | static const unsigned char __initdata mV_mobilevrm[32] = { | 347 | static const unsigned char __cpuinitdata mV_mobilevrm[32] = { |
348 | 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, | 348 | 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, |
349 | 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, | 349 | 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, |
350 | 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, | 350 | 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, |
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index e7b559d74c52..fc09f142d94d 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c | |||
@@ -165,8 +165,8 @@ static unsigned int longrun_get(unsigned int cpu) | |||
165 | * TMTA rules: | 165 | * TMTA rules: |
166 | * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) | 166 | * performance_pctg = (target_freq - low_freq)/(high_freq - low_freq) |
167 | */ | 167 | */ |
168 | static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, | 168 | static unsigned int __cpuinit longrun_determine_freqs(unsigned int *low_freq, |
169 | unsigned int *high_freq) | 169 | unsigned int *high_freq) |
170 | { | 170 | { |
171 | u32 msr_lo, msr_hi; | 171 | u32 msr_lo, msr_hi; |
172 | u32 save_lo, save_hi; | 172 | u32 save_lo, save_hi; |
@@ -258,7 +258,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, | |||
258 | } | 258 | } |
259 | 259 | ||
260 | 260 | ||
261 | static int __init longrun_cpu_init(struct cpufreq_policy *policy) | 261 | static int __cpuinit longrun_cpu_init(struct cpufreq_policy *policy) |
262 | { | 262 | { |
263 | int result = 0; | 263 | int result = 0; |
264 | 264 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.c b/arch/x86/kernel/cpu/cpufreq/mperf.c new file mode 100644 index 000000000000..911e193018ae --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/mperf.c | |||
@@ -0,0 +1,51 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/smp.h> | ||
3 | #include <linux/module.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/cpufreq.h> | ||
6 | #include <linux/slab.h> | ||
7 | |||
8 | #include "mperf.h" | ||
9 | |||
10 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
11 | |||
12 | /* Called via smp_call_function_single(), on the target CPU */ | ||
13 | static void read_measured_perf_ctrs(void *_cur) | ||
14 | { | ||
15 | struct aperfmperf *am = _cur; | ||
16 | |||
17 | get_aperfmperf(am); | ||
18 | } | ||
19 | |||
20 | /* | ||
21 | * Return the measured active (C0) frequency on this CPU since last call | ||
22 | * to this function. | ||
23 | * Input: cpu number | ||
24 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
25 | * | ||
26 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
27 | * over a period of time, while CPU is in C0 state. | ||
28 | * IA32_MPERF counts at the rate of max advertised frequency | ||
29 | * IA32_APERF counts at the rate of actual CPU frequency | ||
30 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
31 | * no meaning should be associated with absolute values of these MSRs. | ||
32 | */ | ||
33 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
34 | unsigned int cpu) | ||
35 | { | ||
36 | struct aperfmperf perf; | ||
37 | unsigned long ratio; | ||
38 | unsigned int retval; | ||
39 | |||
40 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
41 | return 0; | ||
42 | |||
43 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
44 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
45 | |||
46 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
47 | |||
48 | return retval; | ||
49 | } | ||
50 | EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf); | ||
51 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.h b/arch/x86/kernel/cpu/cpufreq/mperf.h new file mode 100644 index 000000000000..5dbf2950dc22 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/mperf.h | |||
@@ -0,0 +1,9 @@ | |||
1 | /* | ||
2 | * (c) 2010 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | */ | ||
7 | |||
8 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
9 | unsigned int cpu); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 7b8a8ba67b07..bd1cac747f67 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
@@ -178,13 +178,8 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) | |||
178 | } | 178 | } |
179 | } | 179 | } |
180 | 180 | ||
181 | if (c->x86 != 0xF) { | 181 | if (c->x86 != 0xF) |
182 | if (!cpu_has(c, X86_FEATURE_EST)) | ||
183 | printk(KERN_WARNING PFX "Unknown CPU. " | ||
184 | "Please send an e-mail to " | ||
185 | "<cpufreq@vger.kernel.org>\n"); | ||
186 | return 0; | 182 | return 0; |
187 | } | ||
188 | 183 | ||
189 | /* on P-4s, the TSC runs with constant frequency independent whether | 184 | /* on P-4s, the TSC runs with constant frequency independent whether |
190 | * throttling is active or not. */ | 185 | * throttling is active or not. */ |
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index ce7cde713e71..4f6f679f2799 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c | |||
@@ -110,7 +110,7 @@ struct pcc_cpu { | |||
110 | u32 output_offset; | 110 | u32 output_offset; |
111 | }; | 111 | }; |
112 | 112 | ||
113 | static struct pcc_cpu *pcc_cpu_info; | 113 | static struct pcc_cpu __percpu *pcc_cpu_info; |
114 | 114 | ||
115 | static int pcc_cpufreq_verify(struct cpufreq_policy *policy) | 115 | static int pcc_cpufreq_verify(struct cpufreq_policy *policy) |
116 | { | 116 | { |
@@ -397,13 +397,17 @@ static int __init pcc_cpufreq_probe(void) | |||
397 | struct pcc_memory_resource *mem_resource; | 397 | struct pcc_memory_resource *mem_resource; |
398 | struct pcc_register_resource *reg_resource; | 398 | struct pcc_register_resource *reg_resource; |
399 | union acpi_object *out_obj, *member; | 399 | union acpi_object *out_obj, *member; |
400 | acpi_handle handle, osc_handle; | 400 | acpi_handle handle, osc_handle, pcch_handle; |
401 | int ret = 0; | 401 | int ret = 0; |
402 | 402 | ||
403 | status = acpi_get_handle(NULL, "\\_SB", &handle); | 403 | status = acpi_get_handle(NULL, "\\_SB", &handle); |
404 | if (ACPI_FAILURE(status)) | 404 | if (ACPI_FAILURE(status)) |
405 | return -ENODEV; | 405 | return -ENODEV; |
406 | 406 | ||
407 | status = acpi_get_handle(handle, "PCCH", &pcch_handle); | ||
408 | if (ACPI_FAILURE(status)) | ||
409 | return -ENODEV; | ||
410 | |||
407 | status = acpi_get_handle(handle, "_OSC", &osc_handle); | 411 | status = acpi_get_handle(handle, "_OSC", &osc_handle); |
408 | if (ACPI_SUCCESS(status)) { | 412 | if (ACPI_SUCCESS(status)) { |
409 | ret = pcc_cpufreq_do_osc(&osc_handle); | 413 | ret = pcc_cpufreq_do_osc(&osc_handle); |
@@ -543,13 +547,13 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
543 | 547 | ||
544 | if (!pcch_virt_addr) { | 548 | if (!pcch_virt_addr) { |
545 | result = -1; | 549 | result = -1; |
546 | goto pcch_null; | 550 | goto out; |
547 | } | 551 | } |
548 | 552 | ||
549 | result = pcc_get_offset(cpu); | 553 | result = pcc_get_offset(cpu); |
550 | if (result) { | 554 | if (result) { |
551 | dprintk("init: PCCP evaluation failed\n"); | 555 | dprintk("init: PCCP evaluation failed\n"); |
552 | goto free; | 556 | goto out; |
553 | } | 557 | } |
554 | 558 | ||
555 | policy->max = policy->cpuinfo.max_freq = | 559 | policy->max = policy->cpuinfo.max_freq = |
@@ -558,14 +562,15 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
558 | ioread32(&pcch_hdr->minimum_frequency) * 1000; | 562 | ioread32(&pcch_hdr->minimum_frequency) * 1000; |
559 | policy->cur = pcc_get_freq(cpu); | 563 | policy->cur = pcc_get_freq(cpu); |
560 | 564 | ||
565 | if (!policy->cur) { | ||
566 | dprintk("init: Unable to get current CPU frequency\n"); | ||
567 | result = -EINVAL; | ||
568 | goto out; | ||
569 | } | ||
570 | |||
561 | dprintk("init: policy->max is %d, policy->min is %d\n", | 571 | dprintk("init: policy->max is %d, policy->min is %d\n", |
562 | policy->max, policy->min); | 572 | policy->max, policy->min); |
563 | 573 | out: | |
564 | return 0; | ||
565 | free: | ||
566 | pcc_clear_mapping(); | ||
567 | free_percpu(pcc_cpu_info); | ||
568 | pcch_null: | ||
569 | return result; | 574 | return result; |
570 | } | 575 | } |
571 | 576 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 9a97116f89e5..4a45fd6e41ba 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c | |||
@@ -569,7 +569,7 @@ static int powernow_verify(struct cpufreq_policy *policy) | |||
569 | * We will then get the same kind of behaviour already tested under | 569 | * We will then get the same kind of behaviour already tested under |
570 | * the "well-known" other OS. | 570 | * the "well-known" other OS. |
571 | */ | 571 | */ |
572 | static int __init fixup_sgtc(void) | 572 | static int __cpuinit fixup_sgtc(void) |
573 | { | 573 | { |
574 | unsigned int sgtc; | 574 | unsigned int sgtc; |
575 | unsigned int m; | 575 | unsigned int m; |
@@ -603,7 +603,7 @@ static unsigned int powernow_get(unsigned int cpu) | |||
603 | } | 603 | } |
604 | 604 | ||
605 | 605 | ||
606 | static int __init acer_cpufreq_pst(const struct dmi_system_id *d) | 606 | static int __cpuinit acer_cpufreq_pst(const struct dmi_system_id *d) |
607 | { | 607 | { |
608 | printk(KERN_WARNING PFX | 608 | printk(KERN_WARNING PFX |
609 | "%s laptop with broken PST tables in BIOS detected.\n", | 609 | "%s laptop with broken PST tables in BIOS detected.\n", |
@@ -621,7 +621,7 @@ static int __init acer_cpufreq_pst(const struct dmi_system_id *d) | |||
621 | * A BIOS update is all that can save them. | 621 | * A BIOS update is all that can save them. |
622 | * Mention this, and disable cpufreq. | 622 | * Mention this, and disable cpufreq. |
623 | */ | 623 | */ |
624 | static struct dmi_system_id __initdata powernow_dmi_table[] = { | 624 | static struct dmi_system_id __cpuinitdata powernow_dmi_table[] = { |
625 | { | 625 | { |
626 | .callback = acer_cpufreq_pst, | 626 | .callback = acer_cpufreq_pst, |
627 | .ident = "Acer Aspire", | 627 | .ident = "Acer Aspire", |
@@ -633,7 +633,7 @@ static struct dmi_system_id __initdata powernow_dmi_table[] = { | |||
633 | { } | 633 | { } |
634 | }; | 634 | }; |
635 | 635 | ||
636 | static int __init powernow_cpu_init(struct cpufreq_policy *policy) | 636 | static int __cpuinit powernow_cpu_init(struct cpufreq_policy *policy) |
637 | { | 637 | { |
638 | union msr_fidvidstatus fidvidstatus; | 638 | union msr_fidvidstatus fidvidstatus; |
639 | int result; | 639 | int result; |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index b6215b9798e2..491977baf6c0 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -1,6 +1,5 @@ | |||
1 | |||
2 | /* | 1 | /* |
3 | * (c) 2003-2006 Advanced Micro Devices, Inc. | 2 | * (c) 2003-2010 Advanced Micro Devices, Inc. |
4 | * Your use of this code is subject to the terms and conditions of the | 3 | * Your use of this code is subject to the terms and conditions of the |
5 | * GNU general public license version 2. See "COPYING" or | 4 | * GNU general public license version 2. See "COPYING" or |
6 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
@@ -10,7 +9,7 @@ | |||
10 | * Based on the powernow-k7.c module written by Dave Jones. | 9 | * Based on the powernow-k7.c module written by Dave Jones. |
11 | * (C) 2003 Dave Jones on behalf of SuSE Labs | 10 | * (C) 2003 Dave Jones on behalf of SuSE Labs |
12 | * (C) 2004 Dominik Brodowski <linux@brodo.de> | 11 | * (C) 2004 Dominik Brodowski <linux@brodo.de> |
13 | * (C) 2004 Pavel Machek <pavel@suse.cz> | 12 | * (C) 2004 Pavel Machek <pavel@ucw.cz> |
14 | * Licensed under the terms of the GNU GPL License version 2. | 13 | * Licensed under the terms of the GNU GPL License version 2. |
15 | * Based upon datasheets & sample CPUs kindly provided by AMD. | 14 | * Based upon datasheets & sample CPUs kindly provided by AMD. |
16 | * | 15 | * |
@@ -46,6 +45,7 @@ | |||
46 | #define PFX "powernow-k8: " | 45 | #define PFX "powernow-k8: " |
47 | #define VERSION "version 2.20.00" | 46 | #define VERSION "version 2.20.00" |
48 | #include "powernow-k8.h" | 47 | #include "powernow-k8.h" |
48 | #include "mperf.h" | ||
49 | 49 | ||
50 | /* serialize freq changes */ | 50 | /* serialize freq changes */ |
51 | static DEFINE_MUTEX(fidvid_mutex); | 51 | static DEFINE_MUTEX(fidvid_mutex); |
@@ -54,6 +54,12 @@ static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); | |||
54 | 54 | ||
55 | static int cpu_family = CPU_OPTERON; | 55 | static int cpu_family = CPU_OPTERON; |
56 | 56 | ||
57 | /* core performance boost */ | ||
58 | static bool cpb_capable, cpb_enabled; | ||
59 | static struct msr __percpu *msrs; | ||
60 | |||
61 | static struct cpufreq_driver cpufreq_amd64_driver; | ||
62 | |||
57 | #ifndef CONFIG_SMP | 63 | #ifndef CONFIG_SMP |
58 | static inline const struct cpumask *cpu_core_mask(int cpu) | 64 | static inline const struct cpumask *cpu_core_mask(int cpu) |
59 | { | 65 | { |
@@ -800,6 +806,8 @@ static int find_psb_table(struct powernow_k8_data *data) | |||
800 | * www.amd.com | 806 | * www.amd.com |
801 | */ | 807 | */ |
802 | printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); | 808 | printk(KERN_ERR FW_BUG PFX "No PSB or ACPI _PSS objects\n"); |
809 | printk(KERN_ERR PFX "Make sure that your BIOS is up to date" | ||
810 | " and Cool'N'Quiet support is enabled in BIOS setup\n"); | ||
803 | return -ENODEV; | 811 | return -ENODEV; |
804 | } | 812 | } |
805 | 813 | ||
@@ -904,8 +912,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, | |||
904 | { | 912 | { |
905 | int i; | 913 | int i; |
906 | u32 hi = 0, lo = 0; | 914 | u32 hi = 0, lo = 0; |
907 | rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); | 915 | rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi); |
908 | data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; | 916 | data->max_hw_pstate = (lo & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; |
909 | 917 | ||
910 | for (i = 0; i < data->acpi_data.state_count; i++) { | 918 | for (i = 0; i < data->acpi_data.state_count; i++) { |
911 | u32 index; | 919 | u32 index; |
@@ -1017,13 +1025,12 @@ static int get_transition_latency(struct powernow_k8_data *data) | |||
1017 | } | 1025 | } |
1018 | if (max_latency == 0) { | 1026 | if (max_latency == 0) { |
1019 | /* | 1027 | /* |
1020 | * Fam 11h always returns 0 as transition latency. | 1028 | * Fam 11h and later may return 0 as transition latency. This |
1021 | * This is intended and means "very fast". While cpufreq core | 1029 | * is intended and means "very fast". While cpufreq core and |
1022 | * and governors currently can handle that gracefully, better | 1030 | * governors currently can handle that gracefully, better set it |
1023 | * set it to 1 to avoid problems in the future. | 1031 | * to 1 to avoid problems in the future. |
1024 | * For all others it's a BIOS bug. | ||
1025 | */ | 1032 | */ |
1026 | if (boot_cpu_data.x86 != 0x11) | 1033 | if (boot_cpu_data.x86 < 0x11) |
1027 | printk(KERN_ERR FW_WARN PFX "Invalid zero transition " | 1034 | printk(KERN_ERR FW_WARN PFX "Invalid zero transition " |
1028 | "latency\n"); | 1035 | "latency\n"); |
1029 | max_latency = 1; | 1036 | max_latency = 1; |
@@ -1249,6 +1256,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1249 | struct powernow_k8_data *data; | 1256 | struct powernow_k8_data *data; |
1250 | struct init_on_cpu init_on_cpu; | 1257 | struct init_on_cpu init_on_cpu; |
1251 | int rc; | 1258 | int rc; |
1259 | struct cpuinfo_x86 *c = &cpu_data(pol->cpu); | ||
1252 | 1260 | ||
1253 | if (!cpu_online(pol->cpu)) | 1261 | if (!cpu_online(pol->cpu)) |
1254 | return -ENODEV; | 1262 | return -ENODEV; |
@@ -1323,6 +1331,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1323 | return -EINVAL; | 1331 | return -EINVAL; |
1324 | } | 1332 | } |
1325 | 1333 | ||
1334 | /* Check for APERF/MPERF support in hardware */ | ||
1335 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | ||
1336 | cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf; | ||
1337 | |||
1326 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); | 1338 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); |
1327 | 1339 | ||
1328 | if (cpu_family == CPU_HW_PSTATE) | 1340 | if (cpu_family == CPU_HW_PSTATE) |
@@ -1394,8 +1406,77 @@ out: | |||
1394 | return khz; | 1406 | return khz; |
1395 | } | 1407 | } |
1396 | 1408 | ||
1409 | static void _cpb_toggle_msrs(bool t) | ||
1410 | { | ||
1411 | int cpu; | ||
1412 | |||
1413 | get_online_cpus(); | ||
1414 | |||
1415 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1416 | |||
1417 | for_each_cpu(cpu, cpu_online_mask) { | ||
1418 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
1419 | if (t) | ||
1420 | reg->l &= ~BIT(25); | ||
1421 | else | ||
1422 | reg->l |= BIT(25); | ||
1423 | } | ||
1424 | wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1425 | |||
1426 | put_online_cpus(); | ||
1427 | } | ||
1428 | |||
1429 | /* | ||
1430 | * Switch on/off core performance boosting. | ||
1431 | * | ||
1432 | * 0=disable | ||
1433 | * 1=enable. | ||
1434 | */ | ||
1435 | static void cpb_toggle(bool t) | ||
1436 | { | ||
1437 | if (!cpb_capable) | ||
1438 | return; | ||
1439 | |||
1440 | if (t && !cpb_enabled) { | ||
1441 | cpb_enabled = true; | ||
1442 | _cpb_toggle_msrs(t); | ||
1443 | printk(KERN_INFO PFX "Core Boosting enabled.\n"); | ||
1444 | } else if (!t && cpb_enabled) { | ||
1445 | cpb_enabled = false; | ||
1446 | _cpb_toggle_msrs(t); | ||
1447 | printk(KERN_INFO PFX "Core Boosting disabled.\n"); | ||
1448 | } | ||
1449 | } | ||
1450 | |||
1451 | static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, | ||
1452 | size_t count) | ||
1453 | { | ||
1454 | int ret = -EINVAL; | ||
1455 | unsigned long val = 0; | ||
1456 | |||
1457 | ret = strict_strtoul(buf, 10, &val); | ||
1458 | if (!ret && (val == 0 || val == 1) && cpb_capable) | ||
1459 | cpb_toggle(val); | ||
1460 | else | ||
1461 | return -EINVAL; | ||
1462 | |||
1463 | return count; | ||
1464 | } | ||
1465 | |||
1466 | static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf) | ||
1467 | { | ||
1468 | return sprintf(buf, "%u\n", cpb_enabled); | ||
1469 | } | ||
1470 | |||
1471 | #define define_one_rw(_name) \ | ||
1472 | static struct freq_attr _name = \ | ||
1473 | __ATTR(_name, 0644, show_##_name, store_##_name) | ||
1474 | |||
1475 | define_one_rw(cpb); | ||
1476 | |||
1397 | static struct freq_attr *powernow_k8_attr[] = { | 1477 | static struct freq_attr *powernow_k8_attr[] = { |
1398 | &cpufreq_freq_attr_scaling_available_freqs, | 1478 | &cpufreq_freq_attr_scaling_available_freqs, |
1479 | &cpb, | ||
1399 | NULL, | 1480 | NULL, |
1400 | }; | 1481 | }; |
1401 | 1482 | ||
@@ -1411,10 +1492,51 @@ static struct cpufreq_driver cpufreq_amd64_driver = { | |||
1411 | .attr = powernow_k8_attr, | 1492 | .attr = powernow_k8_attr, |
1412 | }; | 1493 | }; |
1413 | 1494 | ||
1495 | /* | ||
1496 | * Clear the boost-disable flag on the CPU_DOWN path so that this cpu | ||
1497 | * cannot block the remaining ones from boosting. On the CPU_UP path we | ||
1498 | * simply keep the boost-disable flag in sync with the current global | ||
1499 | * state. | ||
1500 | */ | ||
1501 | static int cpb_notify(struct notifier_block *nb, unsigned long action, | ||
1502 | void *hcpu) | ||
1503 | { | ||
1504 | unsigned cpu = (long)hcpu; | ||
1505 | u32 lo, hi; | ||
1506 | |||
1507 | switch (action) { | ||
1508 | case CPU_UP_PREPARE: | ||
1509 | case CPU_UP_PREPARE_FROZEN: | ||
1510 | |||
1511 | if (!cpb_enabled) { | ||
1512 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
1513 | lo |= BIT(25); | ||
1514 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
1515 | } | ||
1516 | break; | ||
1517 | |||
1518 | case CPU_DOWN_PREPARE: | ||
1519 | case CPU_DOWN_PREPARE_FROZEN: | ||
1520 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
1521 | lo &= ~BIT(25); | ||
1522 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
1523 | break; | ||
1524 | |||
1525 | default: | ||
1526 | break; | ||
1527 | } | ||
1528 | |||
1529 | return NOTIFY_OK; | ||
1530 | } | ||
1531 | |||
1532 | static struct notifier_block cpb_nb = { | ||
1533 | .notifier_call = cpb_notify, | ||
1534 | }; | ||
1535 | |||
1414 | /* driver entry point for init */ | 1536 | /* driver entry point for init */ |
1415 | static int __cpuinit powernowk8_init(void) | 1537 | static int __cpuinit powernowk8_init(void) |
1416 | { | 1538 | { |
1417 | unsigned int i, supported_cpus = 0; | 1539 | unsigned int i, supported_cpus = 0, cpu; |
1418 | 1540 | ||
1419 | for_each_online_cpu(i) { | 1541 | for_each_online_cpu(i) { |
1420 | int rc; | 1542 | int rc; |
@@ -1423,15 +1545,36 @@ static int __cpuinit powernowk8_init(void) | |||
1423 | supported_cpus++; | 1545 | supported_cpus++; |
1424 | } | 1546 | } |
1425 | 1547 | ||
1426 | if (supported_cpus == num_online_cpus()) { | 1548 | if (supported_cpus != num_online_cpus()) |
1427 | printk(KERN_INFO PFX "Found %d %s " | 1549 | return -ENODEV; |
1428 | "processors (%d cpu cores) (" VERSION ")\n", | 1550 | |
1429 | num_online_nodes(), | 1551 | printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", |
1430 | boot_cpu_data.x86_model_id, supported_cpus); | 1552 | num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); |
1431 | return cpufreq_register_driver(&cpufreq_amd64_driver); | 1553 | |
1554 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
1555 | |||
1556 | cpb_capable = true; | ||
1557 | |||
1558 | register_cpu_notifier(&cpb_nb); | ||
1559 | |||
1560 | msrs = msrs_alloc(); | ||
1561 | if (!msrs) { | ||
1562 | printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); | ||
1563 | return -ENOMEM; | ||
1564 | } | ||
1565 | |||
1566 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1567 | |||
1568 | for_each_cpu(cpu, cpu_online_mask) { | ||
1569 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
1570 | cpb_enabled |= !(!!(reg->l & BIT(25))); | ||
1571 | } | ||
1572 | |||
1573 | printk(KERN_INFO PFX "Core Performance Boosting: %s.\n", | ||
1574 | (cpb_enabled ? "on" : "off")); | ||
1432 | } | 1575 | } |
1433 | 1576 | ||
1434 | return -ENODEV; | 1577 | return cpufreq_register_driver(&cpufreq_amd64_driver); |
1435 | } | 1578 | } |
1436 | 1579 | ||
1437 | /* driver entry point for term */ | 1580 | /* driver entry point for term */ |
@@ -1439,6 +1582,13 @@ static void __exit powernowk8_exit(void) | |||
1439 | { | 1582 | { |
1440 | dprintk("exit\n"); | 1583 | dprintk("exit\n"); |
1441 | 1584 | ||
1585 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
1586 | msrs_free(msrs); | ||
1587 | msrs = NULL; | ||
1588 | |||
1589 | unregister_cpu_notifier(&cpb_nb); | ||
1590 | } | ||
1591 | |||
1442 | cpufreq_unregister_driver(&cpufreq_amd64_driver); | 1592 | cpufreq_unregister_driver(&cpufreq_amd64_driver); |
1443 | } | 1593 | } |
1444 | 1594 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 02ce824073cb..df3529b1c02d 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h | |||
@@ -5,7 +5,6 @@ | |||
5 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
6 | */ | 6 | */ |
7 | 7 | ||
8 | |||
9 | enum pstate { | 8 | enum pstate { |
10 | HW_PSTATE_INVALID = 0xff, | 9 | HW_PSTATE_INVALID = 0xff, |
11 | HW_PSTATE_0 = 0, | 10 | HW_PSTATE_0 = 0, |
@@ -55,7 +54,6 @@ struct powernow_k8_data { | |||
55 | struct cpumask *available_cores; | 54 | struct cpumask *available_cores; |
56 | }; | 55 | }; |
57 | 56 | ||
58 | |||
59 | /* processor's cpuid instruction support */ | 57 | /* processor's cpuid instruction support */ |
60 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ | 58 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ |
61 | #define CPUID_XFAM 0x0ff00000 /* extended family */ | 59 | #define CPUID_XFAM 0x0ff00000 /* extended family */ |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 08be922de33a..8095f8611f8a 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -21,37 +21,58 @@ | |||
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/module.h> | ||
24 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
25 | #include <asm/vmware.h> | ||
26 | #include <asm/hypervisor.h> | 26 | #include <asm/hypervisor.h> |
27 | 27 | ||
28 | static inline void __cpuinit | 28 | /* |
29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) | 29 | * Hypervisor detect order. This is specified explicitly here because |
30 | * some hypervisors might implement compatibility modes for other | ||
31 | * hypervisors and therefore need to be detected in specific sequence. | ||
32 | */ | ||
33 | static const __initconst struct hypervisor_x86 * const hypervisors[] = | ||
30 | { | 34 | { |
31 | if (vmware_platform()) | 35 | &x86_hyper_vmware, |
32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; | 36 | &x86_hyper_ms_hyperv, |
33 | else | 37 | #ifdef CONFIG_XEN_PVHVM |
34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; | 38 | &x86_hyper_xen_hvm, |
35 | } | 39 | #endif |
40 | }; | ||
36 | 41 | ||
37 | static inline void __cpuinit | 42 | const struct hypervisor_x86 *x86_hyper; |
38 | hypervisor_set_feature_bits(struct cpuinfo_x86 *c) | 43 | EXPORT_SYMBOL(x86_hyper); |
44 | |||
45 | static inline void __init | ||
46 | detect_hypervisor_vendor(void) | ||
39 | { | 47 | { |
40 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) { | 48 | const struct hypervisor_x86 *h, * const *p; |
41 | vmware_set_feature_bits(c); | 49 | |
42 | return; | 50 | for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { |
51 | h = *p; | ||
52 | if (h->detect()) { | ||
53 | x86_hyper = h; | ||
54 | printk(KERN_INFO "Hypervisor detected: %s\n", h->name); | ||
55 | break; | ||
56 | } | ||
43 | } | 57 | } |
44 | } | 58 | } |
45 | 59 | ||
46 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) | 60 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) |
47 | { | 61 | { |
48 | detect_hypervisor_vendor(c); | 62 | if (x86_hyper && x86_hyper->set_cpu_features) |
49 | hypervisor_set_feature_bits(c); | 63 | x86_hyper->set_cpu_features(c); |
50 | } | 64 | } |
51 | 65 | ||
52 | void __init init_hypervisor_platform(void) | 66 | void __init init_hypervisor_platform(void) |
53 | { | 67 | { |
68 | |||
69 | detect_hypervisor_vendor(); | ||
70 | |||
71 | if (!x86_hyper) | ||
72 | return; | ||
73 | |||
54 | init_hypervisor(&boot_cpu_data); | 74 | init_hypervisor(&boot_cpu_data); |
55 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | 75 | |
56 | vmware_platform_setup(); | 76 | if (x86_hyper->init_platform) |
77 | x86_hyper->init_platform(); | ||
57 | } | 78 | } |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1366c7cfd483..b4389441efbb 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <asm/processor.h> | 12 | #include <asm/processor.h> |
13 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
15 | #include <asm/ds.h> | ||
16 | #include <asm/bugs.h> | 15 | #include <asm/bugs.h> |
17 | #include <asm/cpu.h> | 16 | #include <asm/cpu.h> |
18 | 17 | ||
@@ -40,6 +39,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
40 | misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; | 39 | misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; |
41 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | 40 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); |
42 | c->cpuid_level = cpuid_eax(0); | 41 | c->cpuid_level = cpuid_eax(0); |
42 | get_cpu_cap(c); | ||
43 | } | 43 | } |
44 | } | 44 | } |
45 | 45 | ||
@@ -373,12 +373,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
373 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | 373 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
374 | } | 374 | } |
375 | 375 | ||
376 | if (c->cpuid_level > 6) { | ||
377 | unsigned ecx = cpuid_ecx(6); | ||
378 | if (ecx & 0x01) | ||
379 | set_cpu_cap(c, X86_FEATURE_APERFMPERF); | ||
380 | } | ||
381 | |||
382 | if (cpu_has_xmm2) | 376 | if (cpu_has_xmm2) |
383 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | 377 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
384 | if (cpu_has_ds) { | 378 | if (cpu_has_ds) { |
@@ -388,7 +382,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
388 | set_cpu_cap(c, X86_FEATURE_BTS); | 382 | set_cpu_cap(c, X86_FEATURE_BTS); |
389 | if (!(l1 & (1<<12))) | 383 | if (!(l1 & (1<<12))) |
390 | set_cpu_cap(c, X86_FEATURE_PEBS); | 384 | set_cpu_cap(c, X86_FEATURE_PEBS); |
391 | ds_init_intel(c); | ||
392 | } | 385 | } |
393 | 386 | ||
394 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) | 387 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 94d8e475744c..3fec7d9bfd62 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -148,13 +148,19 @@ union _cpuid4_leaf_ecx { | |||
148 | u32 full; | 148 | u32 full; |
149 | }; | 149 | }; |
150 | 150 | ||
151 | struct amd_l3_cache { | ||
152 | struct pci_dev *dev; | ||
153 | bool can_disable; | ||
154 | unsigned indices; | ||
155 | u8 subcaches[4]; | ||
156 | }; | ||
157 | |||
151 | struct _cpuid4_info { | 158 | struct _cpuid4_info { |
152 | union _cpuid4_leaf_eax eax; | 159 | union _cpuid4_leaf_eax eax; |
153 | union _cpuid4_leaf_ebx ebx; | 160 | union _cpuid4_leaf_ebx ebx; |
154 | union _cpuid4_leaf_ecx ecx; | 161 | union _cpuid4_leaf_ecx ecx; |
155 | unsigned long size; | 162 | unsigned long size; |
156 | bool can_disable; | 163 | struct amd_l3_cache *l3; |
157 | unsigned int l3_indices; | ||
158 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); | 164 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); |
159 | }; | 165 | }; |
160 | 166 | ||
@@ -164,8 +170,7 @@ struct _cpuid4_info_regs { | |||
164 | union _cpuid4_leaf_ebx ebx; | 170 | union _cpuid4_leaf_ebx ebx; |
165 | union _cpuid4_leaf_ecx ecx; | 171 | union _cpuid4_leaf_ecx ecx; |
166 | unsigned long size; | 172 | unsigned long size; |
167 | bool can_disable; | 173 | struct amd_l3_cache *l3; |
168 | unsigned int l3_indices; | ||
169 | }; | 174 | }; |
170 | 175 | ||
171 | unsigned short num_cache_leaves; | 176 | unsigned short num_cache_leaves; |
@@ -302,124 +307,246 @@ struct _cache_attr { | |||
302 | }; | 307 | }; |
303 | 308 | ||
304 | #ifdef CONFIG_CPU_SUP_AMD | 309 | #ifdef CONFIG_CPU_SUP_AMD |
305 | static unsigned int __cpuinit amd_calc_l3_indices(void) | 310 | |
311 | /* | ||
312 | * L3 cache descriptors | ||
313 | */ | ||
314 | static struct amd_l3_cache **__cpuinitdata l3_caches; | ||
315 | |||
316 | static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) | ||
306 | { | 317 | { |
307 | /* | ||
308 | * We're called over smp_call_function_single() and therefore | ||
309 | * are on the correct cpu. | ||
310 | */ | ||
311 | int cpu = smp_processor_id(); | ||
312 | int node = cpu_to_node(cpu); | ||
313 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
314 | unsigned int sc0, sc1, sc2, sc3; | 318 | unsigned int sc0, sc1, sc2, sc3; |
315 | u32 val = 0; | 319 | u32 val = 0; |
316 | 320 | ||
317 | pci_read_config_dword(dev, 0x1C4, &val); | 321 | pci_read_config_dword(l3->dev, 0x1C4, &val); |
318 | 322 | ||
319 | /* calculate subcache sizes */ | 323 | /* calculate subcache sizes */ |
320 | sc0 = !(val & BIT(0)); | 324 | l3->subcaches[0] = sc0 = !(val & BIT(0)); |
321 | sc1 = !(val & BIT(4)); | 325 | l3->subcaches[1] = sc1 = !(val & BIT(4)); |
322 | sc2 = !(val & BIT(8)) + !(val & BIT(9)); | 326 | l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); |
323 | sc3 = !(val & BIT(12)) + !(val & BIT(13)); | 327 | l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); |
324 | 328 | ||
325 | return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; | 329 | l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; |
326 | } | 330 | } |
327 | 331 | ||
328 | static void __cpuinit | 332 | static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) |
329 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | ||
330 | { | 333 | { |
331 | if (index < 3) | 334 | struct amd_l3_cache *l3; |
335 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
336 | |||
337 | l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC); | ||
338 | if (!l3) { | ||
339 | printk(KERN_WARNING "Error allocating L3 struct\n"); | ||
340 | return NULL; | ||
341 | } | ||
342 | |||
343 | l3->dev = dev; | ||
344 | |||
345 | amd_calc_l3_indices(l3); | ||
346 | |||
347 | return l3; | ||
348 | } | ||
349 | |||
350 | static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, | ||
351 | int index) | ||
352 | { | ||
353 | int node; | ||
354 | |||
355 | if (boot_cpu_data.x86 != 0x10) | ||
332 | return; | 356 | return; |
333 | 357 | ||
334 | if (boot_cpu_data.x86 == 0x11) | 358 | if (index < 3) |
335 | return; | 359 | return; |
336 | 360 | ||
337 | /* see errata #382 and #388 */ | 361 | /* see errata #382 and #388 */ |
338 | if ((boot_cpu_data.x86 == 0x10) && | 362 | if (boot_cpu_data.x86_model < 0x8) |
339 | ((boot_cpu_data.x86_model < 0x8) || | ||
340 | (boot_cpu_data.x86_mask < 0x1))) | ||
341 | return; | 363 | return; |
342 | 364 | ||
365 | if ((boot_cpu_data.x86_model == 0x8 || | ||
366 | boot_cpu_data.x86_model == 0x9) | ||
367 | && | ||
368 | boot_cpu_data.x86_mask < 0x1) | ||
369 | return; | ||
370 | |||
343 | /* not in virtualized environments */ | 371 | /* not in virtualized environments */ |
344 | if (num_k8_northbridges == 0) | 372 | if (num_k8_northbridges == 0) |
345 | return; | 373 | return; |
346 | 374 | ||
347 | this_leaf->can_disable = true; | 375 | /* |
348 | this_leaf->l3_indices = amd_calc_l3_indices(); | 376 | * Strictly speaking, the amount in @size below is leaked since it is |
377 | * never freed but this is done only on shutdown so it doesn't matter. | ||
378 | */ | ||
379 | if (!l3_caches) { | ||
380 | int size = num_k8_northbridges * sizeof(struct amd_l3_cache *); | ||
381 | |||
382 | l3_caches = kzalloc(size, GFP_ATOMIC); | ||
383 | if (!l3_caches) | ||
384 | return; | ||
385 | } | ||
386 | |||
387 | node = amd_get_nb_id(smp_processor_id()); | ||
388 | |||
389 | if (!l3_caches[node]) { | ||
390 | l3_caches[node] = amd_init_l3_cache(node); | ||
391 | l3_caches[node]->can_disable = true; | ||
392 | } | ||
393 | |||
394 | WARN_ON(!l3_caches[node]); | ||
395 | |||
396 | this_leaf->l3 = l3_caches[node]; | ||
349 | } | 397 | } |
350 | 398 | ||
351 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | 399 | /* |
352 | unsigned int index) | 400 | * check whether a slot used for disabling an L3 index is occupied. |
401 | * @l3: L3 cache descriptor | ||
402 | * @slot: slot number (0..1) | ||
403 | * | ||
404 | * @returns: the disabled index if used or negative value if slot free. | ||
405 | */ | ||
406 | int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot) | ||
353 | { | 407 | { |
354 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
355 | int node = amd_get_nb_id(cpu); | ||
356 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
357 | unsigned int reg = 0; | 408 | unsigned int reg = 0; |
358 | 409 | ||
359 | if (!this_leaf->can_disable) | 410 | pci_read_config_dword(l3->dev, 0x1BC + slot * 4, ®); |
360 | return -EINVAL; | 411 | |
412 | /* check whether this slot is activated already */ | ||
413 | if (reg & (3UL << 30)) | ||
414 | return reg & 0xfff; | ||
415 | |||
416 | return -1; | ||
417 | } | ||
418 | |||
419 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | ||
420 | unsigned int slot) | ||
421 | { | ||
422 | int index; | ||
361 | 423 | ||
362 | if (!dev) | 424 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
363 | return -EINVAL; | 425 | return -EINVAL; |
364 | 426 | ||
365 | pci_read_config_dword(dev, 0x1BC + index * 4, ®); | 427 | index = amd_get_l3_disable_slot(this_leaf->l3, slot); |
366 | return sprintf(buf, "0x%08x\n", reg); | 428 | if (index >= 0) |
429 | return sprintf(buf, "%d\n", index); | ||
430 | |||
431 | return sprintf(buf, "FREE\n"); | ||
367 | } | 432 | } |
368 | 433 | ||
369 | #define SHOW_CACHE_DISABLE(index) \ | 434 | #define SHOW_CACHE_DISABLE(slot) \ |
370 | static ssize_t \ | 435 | static ssize_t \ |
371 | show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ | 436 | show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \ |
372 | { \ | 437 | { \ |
373 | return show_cache_disable(this_leaf, buf, index); \ | 438 | return show_cache_disable(this_leaf, buf, slot); \ |
374 | } | 439 | } |
375 | SHOW_CACHE_DISABLE(0) | 440 | SHOW_CACHE_DISABLE(0) |
376 | SHOW_CACHE_DISABLE(1) | 441 | SHOW_CACHE_DISABLE(1) |
377 | 442 | ||
378 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | 443 | static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, |
379 | const char *buf, size_t count, unsigned int index) | 444 | unsigned slot, unsigned long idx) |
380 | { | 445 | { |
381 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | 446 | int i; |
382 | int node = amd_get_nb_id(cpu); | 447 | |
383 | struct pci_dev *dev = node_to_k8_nb_misc(node); | 448 | idx |= BIT(30); |
384 | unsigned long val = 0; | 449 | |
450 | /* | ||
451 | * disable index in all 4 subcaches | ||
452 | */ | ||
453 | for (i = 0; i < 4; i++) { | ||
454 | u32 reg = idx | (i << 20); | ||
455 | |||
456 | if (!l3->subcaches[i]) | ||
457 | continue; | ||
458 | |||
459 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | ||
460 | |||
461 | /* | ||
462 | * We need to WBINVD on a core on the node containing the L3 | ||
463 | * cache which indices we disable therefore a simple wbinvd() | ||
464 | * is not sufficient. | ||
465 | */ | ||
466 | wbinvd_on_cpu(cpu); | ||
467 | |||
468 | reg |= BIT(31); | ||
469 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | ||
470 | } | ||
471 | } | ||
472 | |||
473 | /* | ||
474 | * disable a L3 cache index by using a disable-slot | ||
475 | * | ||
476 | * @l3: L3 cache descriptor | ||
477 | * @cpu: A CPU on the node containing the L3 cache | ||
478 | * @slot: slot number (0..1) | ||
479 | * @index: index to disable | ||
480 | * | ||
481 | * @return: 0 on success, error status on failure | ||
482 | */ | ||
483 | int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot, | ||
484 | unsigned long index) | ||
485 | { | ||
486 | int ret = 0; | ||
385 | 487 | ||
386 | #define SUBCACHE_MASK (3UL << 20) | 488 | #define SUBCACHE_MASK (3UL << 20) |
387 | #define SUBCACHE_INDEX 0xfff | 489 | #define SUBCACHE_INDEX 0xfff |
388 | 490 | ||
389 | if (!this_leaf->can_disable) | 491 | /* |
492 | * check whether this slot is already used or | ||
493 | * the index is already disabled | ||
494 | */ | ||
495 | ret = amd_get_l3_disable_slot(l3, slot); | ||
496 | if (ret >= 0) | ||
390 | return -EINVAL; | 497 | return -EINVAL; |
391 | 498 | ||
499 | /* | ||
500 | * check whether the other slot has disabled the | ||
501 | * same index already | ||
502 | */ | ||
503 | if (index == amd_get_l3_disable_slot(l3, !slot)) | ||
504 | return -EINVAL; | ||
505 | |||
506 | /* do not allow writes outside of allowed bits */ | ||
507 | if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | ||
508 | ((index & SUBCACHE_INDEX) > l3->indices)) | ||
509 | return -EINVAL; | ||
510 | |||
511 | amd_l3_disable_index(l3, cpu, slot, index); | ||
512 | |||
513 | return 0; | ||
514 | } | ||
515 | |||
516 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | ||
517 | const char *buf, size_t count, | ||
518 | unsigned int slot) | ||
519 | { | ||
520 | unsigned long val = 0; | ||
521 | int cpu, err = 0; | ||
522 | |||
392 | if (!capable(CAP_SYS_ADMIN)) | 523 | if (!capable(CAP_SYS_ADMIN)) |
393 | return -EPERM; | 524 | return -EPERM; |
394 | 525 | ||
395 | if (!dev) | 526 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
396 | return -EINVAL; | 527 | return -EINVAL; |
397 | 528 | ||
398 | if (strict_strtoul(buf, 10, &val) < 0) | 529 | cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); |
399 | return -EINVAL; | ||
400 | 530 | ||
401 | /* do not allow writes outside of allowed bits */ | 531 | if (strict_strtoul(buf, 10, &val) < 0) |
402 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | ||
403 | ((val & SUBCACHE_INDEX) > this_leaf->l3_indices)) | ||
404 | return -EINVAL; | 532 | return -EINVAL; |
405 | 533 | ||
406 | val |= BIT(30); | 534 | err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val); |
407 | pci_write_config_dword(dev, 0x1BC + index * 4, val); | 535 | if (err) { |
408 | /* | 536 | if (err == -EEXIST) |
409 | * We need to WBINVD on a core on the node containing the L3 cache which | 537 | printk(KERN_WARNING "L3 disable slot %d in use!\n", |
410 | * indices we disable therefore a simple wbinvd() is not sufficient. | 538 | slot); |
411 | */ | 539 | return err; |
412 | wbinvd_on_cpu(cpu); | 540 | } |
413 | pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31)); | ||
414 | return count; | 541 | return count; |
415 | } | 542 | } |
416 | 543 | ||
417 | #define STORE_CACHE_DISABLE(index) \ | 544 | #define STORE_CACHE_DISABLE(slot) \ |
418 | static ssize_t \ | 545 | static ssize_t \ |
419 | store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ | 546 | store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ |
420 | const char *buf, size_t count) \ | 547 | const char *buf, size_t count) \ |
421 | { \ | 548 | { \ |
422 | return store_cache_disable(this_leaf, buf, count, index); \ | 549 | return store_cache_disable(this_leaf, buf, count, slot); \ |
423 | } | 550 | } |
424 | STORE_CACHE_DISABLE(0) | 551 | STORE_CACHE_DISABLE(0) |
425 | STORE_CACHE_DISABLE(1) | 552 | STORE_CACHE_DISABLE(1) |
@@ -431,7 +558,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | |||
431 | 558 | ||
432 | #else /* CONFIG_CPU_SUP_AMD */ | 559 | #else /* CONFIG_CPU_SUP_AMD */ |
433 | static void __cpuinit | 560 | static void __cpuinit |
434 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | 561 | amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) |
435 | { | 562 | { |
436 | }; | 563 | }; |
437 | #endif /* CONFIG_CPU_SUP_AMD */ | 564 | #endif /* CONFIG_CPU_SUP_AMD */ |
@@ -447,8 +574,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index, | |||
447 | 574 | ||
448 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | 575 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
449 | amd_cpuid4(index, &eax, &ebx, &ecx); | 576 | amd_cpuid4(index, &eax, &ebx, &ecx); |
450 | if (boot_cpu_data.x86 >= 0x10) | 577 | amd_check_l3_disable(this_leaf, index); |
451 | amd_check_l3_disable(index, this_leaf); | ||
452 | } else { | 578 | } else { |
453 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | 579 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); |
454 | } | 580 | } |
@@ -722,6 +848,7 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) | |||
722 | for (i = 0; i < num_cache_leaves; i++) | 848 | for (i = 0; i < num_cache_leaves; i++) |
723 | cache_remove_shared_cpu_map(cpu, i); | 849 | cache_remove_shared_cpu_map(cpu, i); |
724 | 850 | ||
851 | kfree(per_cpu(ici_cpuid4_info, cpu)->l3); | ||
725 | kfree(per_cpu(ici_cpuid4_info, cpu)); | 852 | kfree(per_cpu(ici_cpuid4_info, cpu)); |
726 | per_cpu(ici_cpuid4_info, cpu) = NULL; | 853 | per_cpu(ici_cpuid4_info, cpu) = NULL; |
727 | } | 854 | } |
@@ -1006,7 +1133,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
1006 | 1133 | ||
1007 | this_leaf = CPUID4_INFO_IDX(cpu, i); | 1134 | this_leaf = CPUID4_INFO_IDX(cpu, i); |
1008 | 1135 | ||
1009 | if (this_leaf->can_disable) | 1136 | if (this_leaf->l3 && this_leaf->l3->can_disable) |
1010 | ktype_cache.default_attrs = default_l3_attrs; | 1137 | ktype_cache.default_attrs = default_l3_attrs; |
1011 | else | 1138 | else |
1012 | ktype_cache.default_attrs = default_attrs; | 1139 | ktype_cache.default_attrs = default_attrs; |
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 4ac6d48fe11b..bb34b03af252 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
@@ -7,3 +7,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | |||
7 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o | 7 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o |
8 | 8 | ||
9 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o | 9 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o |
10 | |||
11 | obj-$(CONFIG_ACPI_APEI) += mce-apei.o | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c new file mode 100644 index 000000000000..8209472b27a5 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c | |||
@@ -0,0 +1,138 @@ | |||
1 | /* | ||
2 | * Bridge between MCE and APEI | ||
3 | * | ||
4 | * On some machine, corrected memory errors are reported via APEI | ||
5 | * generic hardware error source (GHES) instead of corrected Machine | ||
6 | * Check. These corrected memory errors can be reported to user space | ||
7 | * through /dev/mcelog via faking a corrected Machine Check, so that | ||
8 | * the error memory page can be offlined by /sbin/mcelog if the error | ||
9 | * count for one page is beyond the threshold. | ||
10 | * | ||
11 | * For fatal MCE, save MCE record into persistent storage via ERST, so | ||
12 | * that the MCE record can be logged after reboot via ERST. | ||
13 | * | ||
14 | * Copyright 2010 Intel Corp. | ||
15 | * Author: Huang Ying <ying.huang@intel.com> | ||
16 | * | ||
17 | * This program is free software; you can redistribute it and/or | ||
18 | * modify it under the terms of the GNU General Public License version | ||
19 | * 2 as published by the Free Software Foundation. | ||
20 | * | ||
21 | * This program is distributed in the hope that it will be useful, | ||
22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
24 | * GNU General Public License for more details. | ||
25 | * | ||
26 | * You should have received a copy of the GNU General Public License | ||
27 | * along with this program; if not, write to the Free Software | ||
28 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
29 | */ | ||
30 | |||
31 | #include <linux/kernel.h> | ||
32 | #include <linux/acpi.h> | ||
33 | #include <linux/cper.h> | ||
34 | #include <acpi/apei.h> | ||
35 | #include <asm/mce.h> | ||
36 | |||
37 | #include "mce-internal.h" | ||
38 | |||
39 | void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) | ||
40 | { | ||
41 | struct mce m; | ||
42 | |||
43 | /* Only corrected MC is reported */ | ||
44 | if (!corrected) | ||
45 | return; | ||
46 | |||
47 | mce_setup(&m); | ||
48 | m.bank = 1; | ||
49 | /* Fake a memory read corrected error with unknown channel */ | ||
50 | m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; | ||
51 | m.addr = mem_err->physical_addr; | ||
52 | mce_log(&m); | ||
53 | mce_notify_irq(); | ||
54 | } | ||
55 | EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); | ||
56 | |||
57 | #define CPER_CREATOR_MCE \ | ||
58 | UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ | ||
59 | 0x64, 0x90, 0xb8, 0x9d) | ||
60 | #define CPER_SECTION_TYPE_MCE \ | ||
61 | UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ | ||
62 | 0x04, 0x4a, 0x38, 0xfc) | ||
63 | |||
64 | /* | ||
65 | * CPER specification (in UEFI specification 2.3 appendix N) requires | ||
66 | * byte-packed. | ||
67 | */ | ||
68 | struct cper_mce_record { | ||
69 | struct cper_record_header hdr; | ||
70 | struct cper_section_descriptor sec_hdr; | ||
71 | struct mce mce; | ||
72 | } __packed; | ||
73 | |||
74 | int apei_write_mce(struct mce *m) | ||
75 | { | ||
76 | struct cper_mce_record rcd; | ||
77 | |||
78 | memset(&rcd, 0, sizeof(rcd)); | ||
79 | memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE); | ||
80 | rcd.hdr.revision = CPER_RECORD_REV; | ||
81 | rcd.hdr.signature_end = CPER_SIG_END; | ||
82 | rcd.hdr.section_count = 1; | ||
83 | rcd.hdr.error_severity = CPER_SEV_FATAL; | ||
84 | /* timestamp, platform_id, partition_id are all invalid */ | ||
85 | rcd.hdr.validation_bits = 0; | ||
86 | rcd.hdr.record_length = sizeof(rcd); | ||
87 | rcd.hdr.creator_id = CPER_CREATOR_MCE; | ||
88 | rcd.hdr.notification_type = CPER_NOTIFY_MCE; | ||
89 | rcd.hdr.record_id = cper_next_record_id(); | ||
90 | rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR; | ||
91 | |||
92 | rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd; | ||
93 | rcd.sec_hdr.section_length = sizeof(rcd.mce); | ||
94 | rcd.sec_hdr.revision = CPER_SEC_REV; | ||
95 | /* fru_id and fru_text is invalid */ | ||
96 | rcd.sec_hdr.validation_bits = 0; | ||
97 | rcd.sec_hdr.flags = CPER_SEC_PRIMARY; | ||
98 | rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE; | ||
99 | rcd.sec_hdr.section_severity = CPER_SEV_FATAL; | ||
100 | |||
101 | memcpy(&rcd.mce, m, sizeof(*m)); | ||
102 | |||
103 | return erst_write(&rcd.hdr); | ||
104 | } | ||
105 | |||
106 | ssize_t apei_read_mce(struct mce *m, u64 *record_id) | ||
107 | { | ||
108 | struct cper_mce_record rcd; | ||
109 | ssize_t len; | ||
110 | |||
111 | len = erst_read_next(&rcd.hdr, sizeof(rcd)); | ||
112 | if (len <= 0) | ||
113 | return len; | ||
114 | /* Can not skip other records in storage via ERST unless clear them */ | ||
115 | else if (len != sizeof(rcd) || | ||
116 | uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { | ||
117 | if (printk_ratelimit()) | ||
118 | pr_warning( | ||
119 | "MCE-APEI: Can not skip the unknown record in ERST"); | ||
120 | return -EIO; | ||
121 | } | ||
122 | |||
123 | memcpy(m, &rcd.mce, sizeof(*m)); | ||
124 | *record_id = rcd.hdr.record_id; | ||
125 | |||
126 | return sizeof(*m); | ||
127 | } | ||
128 | |||
129 | /* Check whether there is record in ERST */ | ||
130 | int apei_check_mce(void) | ||
131 | { | ||
132 | return erst_get_record_count(); | ||
133 | } | ||
134 | |||
135 | int apei_clear_mce(u64 record_id) | ||
136 | { | ||
137 | return erst_clear(record_id); | ||
138 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 32996f9fab67..fefcc69ee8b5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -28,3 +28,26 @@ extern int mce_ser; | |||
28 | 28 | ||
29 | extern struct mce_bank *mce_banks; | 29 | extern struct mce_bank *mce_banks; |
30 | 30 | ||
31 | #ifdef CONFIG_ACPI_APEI | ||
32 | int apei_write_mce(struct mce *m); | ||
33 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); | ||
34 | int apei_check_mce(void); | ||
35 | int apei_clear_mce(u64 record_id); | ||
36 | #else | ||
37 | static inline int apei_write_mce(struct mce *m) | ||
38 | { | ||
39 | return -EINVAL; | ||
40 | } | ||
41 | static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id) | ||
42 | { | ||
43 | return 0; | ||
44 | } | ||
45 | static inline int apei_check_mce(void) | ||
46 | { | ||
47 | return 0; | ||
48 | } | ||
49 | static inline int apei_clear_mce(u64 record_id) | ||
50 | { | ||
51 | return -EINVAL; | ||
52 | } | ||
53 | #endif | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8a6f0afa767e..ed41562909fe 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/fs.h> | 36 | #include <linux/fs.h> |
37 | #include <linux/mm.h> | 37 | #include <linux/mm.h> |
38 | #include <linux/debugfs.h> | 38 | #include <linux/debugfs.h> |
39 | #include <linux/edac_mce.h> | ||
39 | 40 | ||
40 | #include <asm/processor.h> | 41 | #include <asm/processor.h> |
41 | #include <asm/hw_irq.h> | 42 | #include <asm/hw_irq.h> |
@@ -50,7 +51,7 @@ | |||
50 | static DEFINE_MUTEX(mce_read_mutex); | 51 | static DEFINE_MUTEX(mce_read_mutex); |
51 | 52 | ||
52 | #define rcu_dereference_check_mce(p) \ | 53 | #define rcu_dereference_check_mce(p) \ |
53 | rcu_dereference_check((p), \ | 54 | rcu_dereference_index_check((p), \ |
54 | rcu_read_lock_sched_held() || \ | 55 | rcu_read_lock_sched_held() || \ |
55 | lockdep_is_held(&mce_read_mutex)) | 56 | lockdep_is_held(&mce_read_mutex)) |
56 | 57 | ||
@@ -106,8 +107,8 @@ EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | |||
106 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, | 107 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, |
107 | void *data) | 108 | void *data) |
108 | { | 109 | { |
109 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); | 110 | pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n"); |
110 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); | 111 | pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n"); |
111 | 112 | ||
112 | return NOTIFY_STOP; | 113 | return NOTIFY_STOP; |
113 | } | 114 | } |
@@ -169,6 +170,15 @@ void mce_log(struct mce *mce) | |||
169 | entry = rcu_dereference_check_mce(mcelog.next); | 170 | entry = rcu_dereference_check_mce(mcelog.next); |
170 | for (;;) { | 171 | for (;;) { |
171 | /* | 172 | /* |
173 | * If edac_mce is enabled, it will check the error type | ||
174 | * and will process it, if it is a known error. | ||
175 | * Otherwise, the error will be sent through mcelog | ||
176 | * interface | ||
177 | */ | ||
178 | if (edac_mce_parse(mce)) | ||
179 | return; | ||
180 | |||
181 | /* | ||
172 | * When the buffer fills up discard new entries. | 182 | * When the buffer fills up discard new entries. |
173 | * Assume that the earlier errors are the more | 183 | * Assume that the earlier errors are the more |
174 | * interesting ones: | 184 | * interesting ones: |
@@ -201,11 +211,11 @@ void mce_log(struct mce *mce) | |||
201 | 211 | ||
202 | static void print_mce(struct mce *m) | 212 | static void print_mce(struct mce *m) |
203 | { | 213 | { |
204 | pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | 214 | pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", |
205 | m->extcpu, m->mcgstatus, m->bank, m->status); | 215 | m->extcpu, m->mcgstatus, m->bank, m->status); |
206 | 216 | ||
207 | if (m->ip) { | 217 | if (m->ip) { |
208 | pr_emerg("RIP%s %02x:<%016Lx> ", | 218 | pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", |
209 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | 219 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", |
210 | m->cs, m->ip); | 220 | m->cs, m->ip); |
211 | 221 | ||
@@ -214,14 +224,14 @@ static void print_mce(struct mce *m) | |||
214 | pr_cont("\n"); | 224 | pr_cont("\n"); |
215 | } | 225 | } |
216 | 226 | ||
217 | pr_emerg("TSC %llx ", m->tsc); | 227 | pr_emerg(HW_ERR "TSC %llx ", m->tsc); |
218 | if (m->addr) | 228 | if (m->addr) |
219 | pr_cont("ADDR %llx ", m->addr); | 229 | pr_cont("ADDR %llx ", m->addr); |
220 | if (m->misc) | 230 | if (m->misc) |
221 | pr_cont("MISC %llx ", m->misc); | 231 | pr_cont("MISC %llx ", m->misc); |
222 | 232 | ||
223 | pr_cont("\n"); | 233 | pr_cont("\n"); |
224 | pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | 234 | pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", |
225 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); | 235 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); |
226 | 236 | ||
227 | /* | 237 | /* |
@@ -231,16 +241,6 @@ static void print_mce(struct mce *m) | |||
231 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); | 241 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); |
232 | } | 242 | } |
233 | 243 | ||
234 | static void print_mce_head(void) | ||
235 | { | ||
236 | pr_emerg("\nHARDWARE ERROR\n"); | ||
237 | } | ||
238 | |||
239 | static void print_mce_tail(void) | ||
240 | { | ||
241 | pr_emerg("This is not a software problem!\n"); | ||
242 | } | ||
243 | |||
244 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | 244 | #define PANIC_TIMEOUT 5 /* 5 seconds */ |
245 | 245 | ||
246 | static atomic_t mce_paniced; | 246 | static atomic_t mce_paniced; |
@@ -264,7 +264,7 @@ static void wait_for_panic(void) | |||
264 | 264 | ||
265 | static void mce_panic(char *msg, struct mce *final, char *exp) | 265 | static void mce_panic(char *msg, struct mce *final, char *exp) |
266 | { | 266 | { |
267 | int i; | 267 | int i, apei_err = 0; |
268 | 268 | ||
269 | if (!fake_panic) { | 269 | if (!fake_panic) { |
270 | /* | 270 | /* |
@@ -281,14 +281,16 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
281 | if (atomic_inc_return(&mce_fake_paniced) > 1) | 281 | if (atomic_inc_return(&mce_fake_paniced) > 1) |
282 | return; | 282 | return; |
283 | } | 283 | } |
284 | print_mce_head(); | ||
285 | /* First print corrected ones that are still unlogged */ | 284 | /* First print corrected ones that are still unlogged */ |
286 | for (i = 0; i < MCE_LOG_LEN; i++) { | 285 | for (i = 0; i < MCE_LOG_LEN; i++) { |
287 | struct mce *m = &mcelog.entry[i]; | 286 | struct mce *m = &mcelog.entry[i]; |
288 | if (!(m->status & MCI_STATUS_VAL)) | 287 | if (!(m->status & MCI_STATUS_VAL)) |
289 | continue; | 288 | continue; |
290 | if (!(m->status & MCI_STATUS_UC)) | 289 | if (!(m->status & MCI_STATUS_UC)) { |
291 | print_mce(m); | 290 | print_mce(m); |
291 | if (!apei_err) | ||
292 | apei_err = apei_write_mce(m); | ||
293 | } | ||
292 | } | 294 | } |
293 | /* Now print uncorrected but with the final one last */ | 295 | /* Now print uncorrected but with the final one last */ |
294 | for (i = 0; i < MCE_LOG_LEN; i++) { | 296 | for (i = 0; i < MCE_LOG_LEN; i++) { |
@@ -297,22 +299,27 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
297 | continue; | 299 | continue; |
298 | if (!(m->status & MCI_STATUS_UC)) | 300 | if (!(m->status & MCI_STATUS_UC)) |
299 | continue; | 301 | continue; |
300 | if (!final || memcmp(m, final, sizeof(struct mce))) | 302 | if (!final || memcmp(m, final, sizeof(struct mce))) { |
301 | print_mce(m); | 303 | print_mce(m); |
304 | if (!apei_err) | ||
305 | apei_err = apei_write_mce(m); | ||
306 | } | ||
302 | } | 307 | } |
303 | if (final) | 308 | if (final) { |
304 | print_mce(final); | 309 | print_mce(final); |
310 | if (!apei_err) | ||
311 | apei_err = apei_write_mce(final); | ||
312 | } | ||
305 | if (cpu_missing) | 313 | if (cpu_missing) |
306 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); | 314 | pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n"); |
307 | print_mce_tail(); | ||
308 | if (exp) | 315 | if (exp) |
309 | printk(KERN_EMERG "Machine check: %s\n", exp); | 316 | pr_emerg(HW_ERR "Machine check: %s\n", exp); |
310 | if (!fake_panic) { | 317 | if (!fake_panic) { |
311 | if (panic_timeout == 0) | 318 | if (panic_timeout == 0) |
312 | panic_timeout = mce_panic_timeout; | 319 | panic_timeout = mce_panic_timeout; |
313 | panic(msg); | 320 | panic(msg); |
314 | } else | 321 | } else |
315 | printk(KERN_EMERG "Fake kernel panic: %s\n", msg); | 322 | pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); |
316 | } | 323 | } |
317 | 324 | ||
318 | /* Support code for software error injection */ | 325 | /* Support code for software error injection */ |
@@ -539,7 +546,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
539 | struct mce m; | 546 | struct mce m; |
540 | int i; | 547 | int i; |
541 | 548 | ||
542 | __get_cpu_var(mce_poll_count)++; | 549 | percpu_inc(mce_poll_count); |
543 | 550 | ||
544 | mce_setup(&m); | 551 | mce_setup(&m); |
545 | 552 | ||
@@ -581,6 +588,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
581 | */ | 588 | */ |
582 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { | 589 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { |
583 | mce_log(&m); | 590 | mce_log(&m); |
591 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); | ||
584 | add_taint(TAINT_MACHINE_CHECK); | 592 | add_taint(TAINT_MACHINE_CHECK); |
585 | } | 593 | } |
586 | 594 | ||
@@ -934,7 +942,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
934 | 942 | ||
935 | atomic_inc(&mce_entry); | 943 | atomic_inc(&mce_entry); |
936 | 944 | ||
937 | __get_cpu_var(mce_exception_count)++; | 945 | percpu_inc(mce_exception_count); |
938 | 946 | ||
939 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | 947 | if (notify_die(DIE_NMI, "machine check", regs, error_code, |
940 | 18, SIGKILL) == NOTIFY_STOP) | 948 | 18, SIGKILL) == NOTIFY_STOP) |
@@ -1201,7 +1209,7 @@ int mce_notify_irq(void) | |||
1201 | schedule_work(&mce_trigger_work); | 1209 | schedule_work(&mce_trigger_work); |
1202 | 1210 | ||
1203 | if (__ratelimit(&ratelimit)) | 1211 | if (__ratelimit(&ratelimit)) |
1204 | printk(KERN_INFO "Machine check events logged\n"); | 1212 | pr_info(HW_ERR "Machine check events logged\n"); |
1205 | 1213 | ||
1206 | return 1; | 1214 | return 1; |
1207 | } | 1215 | } |
@@ -1493,6 +1501,43 @@ static void collect_tscs(void *data) | |||
1493 | rdtscll(cpu_tsc[smp_processor_id()]); | 1501 | rdtscll(cpu_tsc[smp_processor_id()]); |
1494 | } | 1502 | } |
1495 | 1503 | ||
1504 | static int mce_apei_read_done; | ||
1505 | |||
1506 | /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ | ||
1507 | static int __mce_read_apei(char __user **ubuf, size_t usize) | ||
1508 | { | ||
1509 | int rc; | ||
1510 | u64 record_id; | ||
1511 | struct mce m; | ||
1512 | |||
1513 | if (usize < sizeof(struct mce)) | ||
1514 | return -EINVAL; | ||
1515 | |||
1516 | rc = apei_read_mce(&m, &record_id); | ||
1517 | /* Error or no more MCE record */ | ||
1518 | if (rc <= 0) { | ||
1519 | mce_apei_read_done = 1; | ||
1520 | return rc; | ||
1521 | } | ||
1522 | rc = -EFAULT; | ||
1523 | if (copy_to_user(*ubuf, &m, sizeof(struct mce))) | ||
1524 | return rc; | ||
1525 | /* | ||
1526 | * In fact, we should have cleared the record after that has | ||
1527 | * been flushed to the disk or sent to network in | ||
1528 | * /sbin/mcelog, but we have no interface to support that now, | ||
1529 | * so just clear it to avoid duplication. | ||
1530 | */ | ||
1531 | rc = apei_clear_mce(record_id); | ||
1532 | if (rc) { | ||
1533 | mce_apei_read_done = 1; | ||
1534 | return rc; | ||
1535 | } | ||
1536 | *ubuf += sizeof(struct mce); | ||
1537 | |||
1538 | return 0; | ||
1539 | } | ||
1540 | |||
1496 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | 1541 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, |
1497 | loff_t *off) | 1542 | loff_t *off) |
1498 | { | 1543 | { |
@@ -1506,15 +1551,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
1506 | return -ENOMEM; | 1551 | return -ENOMEM; |
1507 | 1552 | ||
1508 | mutex_lock(&mce_read_mutex); | 1553 | mutex_lock(&mce_read_mutex); |
1554 | |||
1555 | if (!mce_apei_read_done) { | ||
1556 | err = __mce_read_apei(&buf, usize); | ||
1557 | if (err || buf != ubuf) | ||
1558 | goto out; | ||
1559 | } | ||
1560 | |||
1509 | next = rcu_dereference_check_mce(mcelog.next); | 1561 | next = rcu_dereference_check_mce(mcelog.next); |
1510 | 1562 | ||
1511 | /* Only supports full reads right now */ | 1563 | /* Only supports full reads right now */ |
1512 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | 1564 | err = -EINVAL; |
1513 | mutex_unlock(&mce_read_mutex); | 1565 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) |
1514 | kfree(cpu_tsc); | 1566 | goto out; |
1515 | |||
1516 | return -EINVAL; | ||
1517 | } | ||
1518 | 1567 | ||
1519 | err = 0; | 1568 | err = 0; |
1520 | prev = 0; | 1569 | prev = 0; |
@@ -1562,10 +1611,15 @@ timeout: | |||
1562 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | 1611 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); |
1563 | } | 1612 | } |
1564 | } | 1613 | } |
1614 | |||
1615 | if (err) | ||
1616 | err = -EFAULT; | ||
1617 | |||
1618 | out: | ||
1565 | mutex_unlock(&mce_read_mutex); | 1619 | mutex_unlock(&mce_read_mutex); |
1566 | kfree(cpu_tsc); | 1620 | kfree(cpu_tsc); |
1567 | 1621 | ||
1568 | return err ? -EFAULT : buf - ubuf; | 1622 | return err ? err : buf - ubuf; |
1569 | } | 1623 | } |
1570 | 1624 | ||
1571 | static unsigned int mce_poll(struct file *file, poll_table *wait) | 1625 | static unsigned int mce_poll(struct file *file, poll_table *wait) |
@@ -1573,6 +1627,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait) | |||
1573 | poll_wait(file, &mce_wait, wait); | 1627 | poll_wait(file, &mce_wait, wait); |
1574 | if (rcu_dereference_check_mce(mcelog.next)) | 1628 | if (rcu_dereference_check_mce(mcelog.next)) |
1575 | return POLLIN | POLLRDNORM; | 1629 | return POLLIN | POLLRDNORM; |
1630 | if (!mce_apei_read_done && apei_check_mce()) | ||
1631 | return POLLIN | POLLRDNORM; | ||
1576 | return 0; | 1632 | return 0; |
1577 | } | 1633 | } |
1578 | 1634 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 224392d8fe8c..39aaee5c1ab2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -141,6 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
141 | address = (low & MASK_BLKPTR_LO) >> 21; | 141 | address = (low & MASK_BLKPTR_LO) >> 21; |
142 | if (!address) | 142 | if (!address) |
143 | break; | 143 | break; |
144 | |||
144 | address += MCG_XBLK_ADDR; | 145 | address += MCG_XBLK_ADDR; |
145 | } else | 146 | } else |
146 | ++address; | 147 | ++address; |
@@ -148,12 +149,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
148 | if (rdmsr_safe(address, &low, &high)) | 149 | if (rdmsr_safe(address, &low, &high)) |
149 | break; | 150 | break; |
150 | 151 | ||
151 | if (!(high & MASK_VALID_HI)) { | 152 | if (!(high & MASK_VALID_HI)) |
152 | if (block) | 153 | continue; |
153 | continue; | ||
154 | else | ||
155 | break; | ||
156 | } | ||
157 | 154 | ||
158 | if (!(high & MASK_CNTP_HI) || | 155 | if (!(high & MASK_CNTP_HI) || |
159 | (high & MASK_LOCKED_HI)) | 156 | (high & MASK_LOCKED_HI)) |
@@ -530,7 +527,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
530 | err = -ENOMEM; | 527 | err = -ENOMEM; |
531 | goto out; | 528 | goto out; |
532 | } | 529 | } |
533 | if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) { | 530 | if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) { |
534 | kfree(b); | 531 | kfree(b); |
535 | err = -ENOMEM; | 532 | err = -ENOMEM; |
536 | goto out; | 533 | goto out; |
@@ -543,7 +540,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
543 | #ifndef CONFIG_SMP | 540 | #ifndef CONFIG_SMP |
544 | cpumask_setall(b->cpus); | 541 | cpumask_setall(b->cpus); |
545 | #else | 542 | #else |
546 | cpumask_copy(b->cpus, c->llc_shared_map); | 543 | cpumask_set_cpu(cpu, b->cpus); |
547 | #endif | 544 | #endif |
548 | 545 | ||
549 | per_cpu(threshold_banks, cpu)[bank] = b; | 546 | per_cpu(threshold_banks, cpu)[bank] = b; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 62b48e40920a..6fcd0936194f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -95,19 +95,20 @@ static void cmci_discover(int banks, int boot) | |||
95 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 95 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
96 | 96 | ||
97 | /* Already owned by someone else? */ | 97 | /* Already owned by someone else? */ |
98 | if (val & CMCI_EN) { | 98 | if (val & MCI_CTL2_CMCI_EN) { |
99 | if (test_and_clear_bit(i, owned) && !boot) | 99 | if (test_and_clear_bit(i, owned) && !boot) |
100 | print_update("SHD", &hdr, i); | 100 | print_update("SHD", &hdr, i); |
101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
102 | continue; | 102 | continue; |
103 | } | 103 | } |
104 | 104 | ||
105 | val |= CMCI_EN | CMCI_THRESHOLD; | 105 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
106 | val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; | ||
106 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 107 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
107 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 108 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
108 | 109 | ||
109 | /* Did the enable bit stick? -- the bank supports CMCI */ | 110 | /* Did the enable bit stick? -- the bank supports CMCI */ |
110 | if (val & CMCI_EN) { | 111 | if (val & MCI_CTL2_CMCI_EN) { |
111 | if (!test_and_set_bit(i, owned) && !boot) | 112 | if (!test_and_set_bit(i, owned) && !boot) |
112 | print_update("CMCI", &hdr, i); | 113 | print_update("CMCI", &hdr, i); |
113 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 114 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
@@ -155,7 +156,7 @@ void cmci_clear(void) | |||
155 | continue; | 156 | continue; |
156 | /* Disable CMCI */ | 157 | /* Disable CMCI */ |
157 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 158 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
158 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | 159 | val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); |
159 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 160 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
160 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 161 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
161 | } | 162 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 81c499eceb21..169d8804a9f8 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -34,15 +34,25 @@ | |||
34 | /* How long to wait between reporting thermal events */ | 34 | /* How long to wait between reporting thermal events */ |
35 | #define CHECK_INTERVAL (300 * HZ) | 35 | #define CHECK_INTERVAL (300 * HZ) |
36 | 36 | ||
37 | #define THERMAL_THROTTLING_EVENT 0 | ||
38 | #define POWER_LIMIT_EVENT 1 | ||
39 | |||
37 | /* | 40 | /* |
38 | * Current thermal throttling state: | 41 | * Current thermal event state: |
39 | */ | 42 | */ |
40 | struct thermal_state { | 43 | struct _thermal_state { |
41 | bool is_throttled; | 44 | bool new_event; |
42 | 45 | int event; | |
43 | u64 next_check; | 46 | u64 next_check; |
44 | unsigned long throttle_count; | 47 | unsigned long count; |
45 | unsigned long last_throttle_count; | 48 | unsigned long last_count; |
49 | }; | ||
50 | |||
51 | struct thermal_state { | ||
52 | struct _thermal_state core_throttle; | ||
53 | struct _thermal_state core_power_limit; | ||
54 | struct _thermal_state package_throttle; | ||
55 | struct _thermal_state package_power_limit; | ||
46 | }; | 56 | }; |
47 | 57 | ||
48 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); | 58 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); |
@@ -53,11 +63,13 @@ static u32 lvtthmr_init __read_mostly; | |||
53 | 63 | ||
54 | #ifdef CONFIG_SYSFS | 64 | #ifdef CONFIG_SYSFS |
55 | #define define_therm_throt_sysdev_one_ro(_name) \ | 65 | #define define_therm_throt_sysdev_one_ro(_name) \ |
56 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | 66 | static SYSDEV_ATTR(_name, 0444, \ |
67 | therm_throt_sysdev_show_##_name, \ | ||
68 | NULL) \ | ||
57 | 69 | ||
58 | #define define_therm_throt_sysdev_show_func(name) \ | 70 | #define define_therm_throt_sysdev_show_func(event, name) \ |
59 | \ | 71 | \ |
60 | static ssize_t therm_throt_sysdev_show_##name( \ | 72 | static ssize_t therm_throt_sysdev_show_##event##_##name( \ |
61 | struct sys_device *dev, \ | 73 | struct sys_device *dev, \ |
62 | struct sysdev_attribute *attr, \ | 74 | struct sysdev_attribute *attr, \ |
63 | char *buf) \ | 75 | char *buf) \ |
@@ -66,30 +78,42 @@ static ssize_t therm_throt_sysdev_show_##name( \ | |||
66 | ssize_t ret; \ | 78 | ssize_t ret; \ |
67 | \ | 79 | \ |
68 | preempt_disable(); /* CPU hotplug */ \ | 80 | preempt_disable(); /* CPU hotplug */ \ |
69 | if (cpu_online(cpu)) \ | 81 | if (cpu_online(cpu)) { \ |
70 | ret = sprintf(buf, "%lu\n", \ | 82 | ret = sprintf(buf, "%lu\n", \ |
71 | per_cpu(thermal_state, cpu).name); \ | 83 | per_cpu(thermal_state, cpu).event.name); \ |
72 | else \ | 84 | } else \ |
73 | ret = 0; \ | 85 | ret = 0; \ |
74 | preempt_enable(); \ | 86 | preempt_enable(); \ |
75 | \ | 87 | \ |
76 | return ret; \ | 88 | return ret; \ |
77 | } | 89 | } |
78 | 90 | ||
79 | define_therm_throt_sysdev_show_func(throttle_count); | 91 | define_therm_throt_sysdev_show_func(core_throttle, count); |
80 | define_therm_throt_sysdev_one_ro(throttle_count); | 92 | define_therm_throt_sysdev_one_ro(core_throttle_count); |
93 | |||
94 | define_therm_throt_sysdev_show_func(core_power_limit, count); | ||
95 | define_therm_throt_sysdev_one_ro(core_power_limit_count); | ||
96 | |||
97 | define_therm_throt_sysdev_show_func(package_throttle, count); | ||
98 | define_therm_throt_sysdev_one_ro(package_throttle_count); | ||
99 | |||
100 | define_therm_throt_sysdev_show_func(package_power_limit, count); | ||
101 | define_therm_throt_sysdev_one_ro(package_power_limit_count); | ||
81 | 102 | ||
82 | static struct attribute *thermal_throttle_attrs[] = { | 103 | static struct attribute *thermal_throttle_attrs[] = { |
83 | &attr_throttle_count.attr, | 104 | &attr_core_throttle_count.attr, |
84 | NULL | 105 | NULL |
85 | }; | 106 | }; |
86 | 107 | ||
87 | static struct attribute_group thermal_throttle_attr_group = { | 108 | static struct attribute_group thermal_attr_group = { |
88 | .attrs = thermal_throttle_attrs, | 109 | .attrs = thermal_throttle_attrs, |
89 | .name = "thermal_throttle" | 110 | .name = "thermal_throttle" |
90 | }; | 111 | }; |
91 | #endif /* CONFIG_SYSFS */ | 112 | #endif /* CONFIG_SYSFS */ |
92 | 113 | ||
114 | #define CORE_LEVEL 0 | ||
115 | #define PACKAGE_LEVEL 1 | ||
116 | |||
93 | /*** | 117 | /*** |
94 | * therm_throt_process - Process thermal throttling event from interrupt | 118 | * therm_throt_process - Process thermal throttling event from interrupt |
95 | * @curr: Whether the condition is current or not (boolean), since the | 119 | * @curr: Whether the condition is current or not (boolean), since the |
@@ -106,39 +130,70 @@ static struct attribute_group thermal_throttle_attr_group = { | |||
106 | * 1 : Event should be logged further, and a message has been | 130 | * 1 : Event should be logged further, and a message has been |
107 | * printed to the syslog. | 131 | * printed to the syslog. |
108 | */ | 132 | */ |
109 | static int therm_throt_process(bool is_throttled) | 133 | static int therm_throt_process(bool new_event, int event, int level) |
110 | { | 134 | { |
111 | struct thermal_state *state; | 135 | struct _thermal_state *state; |
112 | unsigned int this_cpu; | 136 | unsigned int this_cpu = smp_processor_id(); |
113 | bool was_throttled; | 137 | bool old_event; |
114 | u64 now; | 138 | u64 now; |
139 | struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); | ||
115 | 140 | ||
116 | this_cpu = smp_processor_id(); | ||
117 | now = get_jiffies_64(); | 141 | now = get_jiffies_64(); |
118 | state = &per_cpu(thermal_state, this_cpu); | 142 | if (level == CORE_LEVEL) { |
143 | if (event == THERMAL_THROTTLING_EVENT) | ||
144 | state = &pstate->core_throttle; | ||
145 | else if (event == POWER_LIMIT_EVENT) | ||
146 | state = &pstate->core_power_limit; | ||
147 | else | ||
148 | return 0; | ||
149 | } else if (level == PACKAGE_LEVEL) { | ||
150 | if (event == THERMAL_THROTTLING_EVENT) | ||
151 | state = &pstate->package_throttle; | ||
152 | else if (event == POWER_LIMIT_EVENT) | ||
153 | state = &pstate->package_power_limit; | ||
154 | else | ||
155 | return 0; | ||
156 | } else | ||
157 | return 0; | ||
119 | 158 | ||
120 | was_throttled = state->is_throttled; | 159 | old_event = state->new_event; |
121 | state->is_throttled = is_throttled; | 160 | state->new_event = new_event; |
122 | 161 | ||
123 | if (is_throttled) | 162 | if (new_event) |
124 | state->throttle_count++; | 163 | state->count++; |
125 | 164 | ||
126 | if (time_before64(now, state->next_check) && | 165 | if (time_before64(now, state->next_check) && |
127 | state->throttle_count != state->last_throttle_count) | 166 | state->count != state->last_count) |
128 | return 0; | 167 | return 0; |
129 | 168 | ||
130 | state->next_check = now + CHECK_INTERVAL; | 169 | state->next_check = now + CHECK_INTERVAL; |
131 | state->last_throttle_count = state->throttle_count; | 170 | state->last_count = state->count; |
132 | 171 | ||
133 | /* if we just entered the thermal event */ | 172 | /* if we just entered the thermal event */ |
134 | if (is_throttled) { | 173 | if (new_event) { |
135 | printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count); | 174 | if (event == THERMAL_THROTTLING_EVENT) |
175 | printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", | ||
176 | this_cpu, | ||
177 | level == CORE_LEVEL ? "Core" : "Package", | ||
178 | state->count); | ||
179 | else | ||
180 | printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n", | ||
181 | this_cpu, | ||
182 | level == CORE_LEVEL ? "Core" : "Package", | ||
183 | state->count); | ||
136 | 184 | ||
137 | add_taint(TAINT_MACHINE_CHECK); | 185 | add_taint(TAINT_MACHINE_CHECK); |
138 | return 1; | 186 | return 1; |
139 | } | 187 | } |
140 | if (was_throttled) { | 188 | if (old_event) { |
141 | printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu); | 189 | if (event == THERMAL_THROTTLING_EVENT) |
190 | printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", | ||
191 | this_cpu, | ||
192 | level == CORE_LEVEL ? "Core" : "Package"); | ||
193 | else | ||
194 | printk(KERN_INFO "CPU%d: %s power limit normal\n", | ||
195 | this_cpu, | ||
196 | level == CORE_LEVEL ? "Core" : "Package"); | ||
142 | return 1; | 197 | return 1; |
143 | } | 198 | } |
144 | 199 | ||
@@ -147,15 +202,36 @@ static int therm_throt_process(bool is_throttled) | |||
147 | 202 | ||
148 | #ifdef CONFIG_SYSFS | 203 | #ifdef CONFIG_SYSFS |
149 | /* Add/Remove thermal_throttle interface for CPU device: */ | 204 | /* Add/Remove thermal_throttle interface for CPU device: */ |
150 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) | 205 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, |
206 | unsigned int cpu) | ||
151 | { | 207 | { |
152 | return sysfs_create_group(&sys_dev->kobj, | 208 | int err; |
153 | &thermal_throttle_attr_group); | 209 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
210 | |||
211 | err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); | ||
212 | if (err) | ||
213 | return err; | ||
214 | |||
215 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
216 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
217 | &attr_core_power_limit_count.attr, | ||
218 | thermal_attr_group.name); | ||
219 | if (cpu_has(c, X86_FEATURE_PTS)) { | ||
220 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
221 | &attr_package_throttle_count.attr, | ||
222 | thermal_attr_group.name); | ||
223 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
224 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
225 | &attr_package_power_limit_count.attr, | ||
226 | thermal_attr_group.name); | ||
227 | } | ||
228 | |||
229 | return err; | ||
154 | } | 230 | } |
155 | 231 | ||
156 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | 232 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) |
157 | { | 233 | { |
158 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); | 234 | sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group); |
159 | } | 235 | } |
160 | 236 | ||
161 | /* Mutex protecting device creation against CPU hotplug: */ | 237 | /* Mutex protecting device creation against CPU hotplug: */ |
@@ -177,7 +253,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
177 | case CPU_UP_PREPARE: | 253 | case CPU_UP_PREPARE: |
178 | case CPU_UP_PREPARE_FROZEN: | 254 | case CPU_UP_PREPARE_FROZEN: |
179 | mutex_lock(&therm_cpu_lock); | 255 | mutex_lock(&therm_cpu_lock); |
180 | err = thermal_throttle_add_dev(sys_dev); | 256 | err = thermal_throttle_add_dev(sys_dev, cpu); |
181 | mutex_unlock(&therm_cpu_lock); | 257 | mutex_unlock(&therm_cpu_lock); |
182 | WARN_ON(err); | 258 | WARN_ON(err); |
183 | break; | 259 | break; |
@@ -190,7 +266,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
190 | mutex_unlock(&therm_cpu_lock); | 266 | mutex_unlock(&therm_cpu_lock); |
191 | break; | 267 | break; |
192 | } | 268 | } |
193 | return err ? NOTIFY_BAD : NOTIFY_OK; | 269 | return notifier_from_errno(err); |
194 | } | 270 | } |
195 | 271 | ||
196 | static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = | 272 | static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = |
@@ -213,7 +289,7 @@ static __init int thermal_throttle_init_device(void) | |||
213 | #endif | 289 | #endif |
214 | /* connect live CPUs to sysfs */ | 290 | /* connect live CPUs to sysfs */ |
215 | for_each_online_cpu(cpu) { | 291 | for_each_online_cpu(cpu) { |
216 | err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); | 292 | err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu); |
217 | WARN_ON(err); | 293 | WARN_ON(err); |
218 | } | 294 | } |
219 | #ifdef CONFIG_HOTPLUG_CPU | 295 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -226,14 +302,50 @@ device_initcall(thermal_throttle_init_device); | |||
226 | 302 | ||
227 | #endif /* CONFIG_SYSFS */ | 303 | #endif /* CONFIG_SYSFS */ |
228 | 304 | ||
305 | /* | ||
306 | * Set up the most two significant bit to notify mce log that this thermal | ||
307 | * event type. | ||
308 | * This is a temp solution. May be changed in the future with mce log | ||
309 | * infrasture. | ||
310 | */ | ||
311 | #define CORE_THROTTLED (0) | ||
312 | #define CORE_POWER_LIMIT ((__u64)1 << 62) | ||
313 | #define PACKAGE_THROTTLED ((__u64)2 << 62) | ||
314 | #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) | ||
315 | |||
229 | /* Thermal transition interrupt handler */ | 316 | /* Thermal transition interrupt handler */ |
230 | static void intel_thermal_interrupt(void) | 317 | static void intel_thermal_interrupt(void) |
231 | { | 318 | { |
232 | __u64 msr_val; | 319 | __u64 msr_val; |
320 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); | ||
233 | 321 | ||
234 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | 322 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
235 | if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0)) | 323 | |
236 | mce_log_therm_throt_event(msr_val); | 324 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, |
325 | THERMAL_THROTTLING_EVENT, | ||
326 | CORE_LEVEL) != 0) | ||
327 | mce_log_therm_throt_event(CORE_THROTTLED | msr_val); | ||
328 | |||
329 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
330 | if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, | ||
331 | POWER_LIMIT_EVENT, | ||
332 | CORE_LEVEL) != 0) | ||
333 | mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); | ||
334 | |||
335 | if (cpu_has(c, X86_FEATURE_PTS)) { | ||
336 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | ||
337 | if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, | ||
338 | THERMAL_THROTTLING_EVENT, | ||
339 | PACKAGE_LEVEL) != 0) | ||
340 | mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); | ||
341 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
342 | if (therm_throt_process(msr_val & | ||
343 | PACKAGE_THERM_STATUS_POWER_LIMIT, | ||
344 | POWER_LIMIT_EVENT, | ||
345 | PACKAGE_LEVEL) != 0) | ||
346 | mce_log_therm_throt_event(PACKAGE_POWER_LIMIT | ||
347 | | msr_val); | ||
348 | } | ||
237 | } | 349 | } |
238 | 350 | ||
239 | static void unexpected_thermal_interrupt(void) | 351 | static void unexpected_thermal_interrupt(void) |
@@ -335,8 +447,26 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
335 | apic_write(APIC_LVTTHMR, h); | 447 | apic_write(APIC_LVTTHMR, h); |
336 | 448 | ||
337 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | 449 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); |
338 | wrmsr(MSR_IA32_THERM_INTERRUPT, | 450 | if (cpu_has(c, X86_FEATURE_PLN)) |
339 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | 451 | wrmsr(MSR_IA32_THERM_INTERRUPT, |
452 | l | (THERM_INT_LOW_ENABLE | ||
453 | | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); | ||
454 | else | ||
455 | wrmsr(MSR_IA32_THERM_INTERRUPT, | ||
456 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | ||
457 | |||
458 | if (cpu_has(c, X86_FEATURE_PTS)) { | ||
459 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | ||
460 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
461 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
462 | l | (PACKAGE_THERM_INT_LOW_ENABLE | ||
463 | | PACKAGE_THERM_INT_HIGH_ENABLE | ||
464 | | PACKAGE_THERM_INT_PLN_ENABLE), h); | ||
465 | else | ||
466 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
467 | l | (PACKAGE_THERM_INT_LOW_ENABLE | ||
468 | | PACKAGE_THERM_INT_HIGH_ENABLE), h); | ||
469 | } | ||
340 | 470 | ||
341 | smp_thermal_vector = intel_thermal_interrupt; | 471 | smp_thermal_vector = intel_thermal_interrupt; |
342 | 472 | ||
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c new file mode 100644 index 000000000000..d944bf6c50e9 --- /dev/null +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
@@ -0,0 +1,56 @@ | |||
1 | /* | ||
2 | * HyperV Detection code. | ||
3 | * | ||
4 | * Copyright (C) 2010, Novell, Inc. | ||
5 | * Author : K. Y. Srinivasan <ksrinivasan@novell.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; version 2 of the License. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/types.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <asm/processor.h> | ||
16 | #include <asm/hypervisor.h> | ||
17 | #include <asm/hyperv.h> | ||
18 | #include <asm/mshyperv.h> | ||
19 | |||
20 | struct ms_hyperv_info ms_hyperv; | ||
21 | EXPORT_SYMBOL_GPL(ms_hyperv); | ||
22 | |||
23 | static bool __init ms_hyperv_platform(void) | ||
24 | { | ||
25 | u32 eax; | ||
26 | u32 hyp_signature[3]; | ||
27 | |||
28 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) | ||
29 | return false; | ||
30 | |||
31 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, | ||
32 | &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); | ||
33 | |||
34 | return eax >= HYPERV_CPUID_MIN && | ||
35 | eax <= HYPERV_CPUID_MAX && | ||
36 | !memcmp("Microsoft Hv", hyp_signature, 12); | ||
37 | } | ||
38 | |||
39 | static void __init ms_hyperv_init_platform(void) | ||
40 | { | ||
41 | /* | ||
42 | * Extract the features and hints | ||
43 | */ | ||
44 | ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); | ||
45 | ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); | ||
46 | |||
47 | printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", | ||
48 | ms_hyperv.features, ms_hyperv.hints); | ||
49 | } | ||
50 | |||
51 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | ||
52 | .name = "Microsoft HyperV", | ||
53 | .detect = ms_hyperv_platform, | ||
54 | .init_platform = ms_hyperv_init_platform, | ||
55 | }; | ||
56 | EXPORT_SYMBOL(x86_hyper_ms_hyperv); | ||
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 06130b52f012..c5f59d071425 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -632,9 +632,9 @@ static void __init mtrr_print_out_one_result(int i) | |||
632 | unsigned long gran_base, chunk_base, lose_base; | 632 | unsigned long gran_base, chunk_base, lose_base; |
633 | char gran_factor, chunk_factor, lose_factor; | 633 | char gran_factor, chunk_factor, lose_factor; |
634 | 634 | ||
635 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | 635 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor); |
636 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | 636 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor); |
637 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | 637 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor); |
638 | 638 | ||
639 | pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", | 639 | pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", |
640 | result[i].bad ? "*BAD*" : " ", | 640 | result[i].bad ? "*BAD*" : " ", |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index fd31a441c61c..7d28d7d03885 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -433,13 +433,12 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
433 | { | 433 | { |
434 | unsigned int mask_lo, mask_hi, base_lo, base_hi; | 434 | unsigned int mask_lo, mask_hi, base_lo, base_hi; |
435 | unsigned int tmp, hi; | 435 | unsigned int tmp, hi; |
436 | int cpu; | ||
437 | 436 | ||
438 | /* | 437 | /* |
439 | * get_mtrr doesn't need to update mtrr_state, also it could be called | 438 | * get_mtrr doesn't need to update mtrr_state, also it could be called |
440 | * from any cpu, so try to print it out directly. | 439 | * from any cpu, so try to print it out directly. |
441 | */ | 440 | */ |
442 | cpu = get_cpu(); | 441 | get_cpu(); |
443 | 442 | ||
444 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); | 443 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); |
445 | 444 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 79556bd9b602..01c0f3ee6cc3 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -35,6 +35,7 @@ | |||
35 | 35 | ||
36 | #include <linux/types.h> /* FIXME: kvm_para.h needs this */ | 36 | #include <linux/types.h> /* FIXME: kvm_para.h needs this */ |
37 | 37 | ||
38 | #include <linux/stop_machine.h> | ||
38 | #include <linux/kvm_para.h> | 39 | #include <linux/kvm_para.h> |
39 | #include <linux/uaccess.h> | 40 | #include <linux/uaccess.h> |
40 | #include <linux/module.h> | 41 | #include <linux/module.h> |
@@ -143,22 +144,28 @@ struct set_mtrr_data { | |||
143 | mtrr_type smp_type; | 144 | mtrr_type smp_type; |
144 | }; | 145 | }; |
145 | 146 | ||
147 | static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work); | ||
148 | |||
146 | /** | 149 | /** |
147 | * ipi_handler - Synchronisation handler. Executed by "other" CPUs. | 150 | * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs. |
148 | * @info: pointer to mtrr configuration data | 151 | * @info: pointer to mtrr configuration data |
149 | * | 152 | * |
150 | * Returns nothing. | 153 | * Returns nothing. |
151 | */ | 154 | */ |
152 | static void ipi_handler(void *info) | 155 | static int mtrr_work_handler(void *info) |
153 | { | 156 | { |
154 | #ifdef CONFIG_SMP | 157 | #ifdef CONFIG_SMP |
155 | struct set_mtrr_data *data = info; | 158 | struct set_mtrr_data *data = info; |
156 | unsigned long flags; | 159 | unsigned long flags; |
157 | 160 | ||
161 | atomic_dec(&data->count); | ||
162 | while (!atomic_read(&data->gate)) | ||
163 | cpu_relax(); | ||
164 | |||
158 | local_irq_save(flags); | 165 | local_irq_save(flags); |
159 | 166 | ||
160 | atomic_dec(&data->count); | 167 | atomic_dec(&data->count); |
161 | while (!atomic_read(&data->gate)) | 168 | while (atomic_read(&data->gate)) |
162 | cpu_relax(); | 169 | cpu_relax(); |
163 | 170 | ||
164 | /* The master has cleared me to execute */ | 171 | /* The master has cleared me to execute */ |
@@ -173,12 +180,13 @@ static void ipi_handler(void *info) | |||
173 | } | 180 | } |
174 | 181 | ||
175 | atomic_dec(&data->count); | 182 | atomic_dec(&data->count); |
176 | while (atomic_read(&data->gate)) | 183 | while (!atomic_read(&data->gate)) |
177 | cpu_relax(); | 184 | cpu_relax(); |
178 | 185 | ||
179 | atomic_dec(&data->count); | 186 | atomic_dec(&data->count); |
180 | local_irq_restore(flags); | 187 | local_irq_restore(flags); |
181 | #endif | 188 | #endif |
189 | return 0; | ||
182 | } | 190 | } |
183 | 191 | ||
184 | static inline int types_compatible(mtrr_type type1, mtrr_type type2) | 192 | static inline int types_compatible(mtrr_type type1, mtrr_type type2) |
@@ -198,7 +206,7 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) | |||
198 | * | 206 | * |
199 | * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: | 207 | * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: |
200 | * | 208 | * |
201 | * 1. Send IPI to do the following: | 209 | * 1. Queue work to do the following on all processors: |
202 | * 2. Disable Interrupts | 210 | * 2. Disable Interrupts |
203 | * 3. Wait for all procs to do so | 211 | * 3. Wait for all procs to do so |
204 | * 4. Enter no-fill cache mode | 212 | * 4. Enter no-fill cache mode |
@@ -215,14 +223,17 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) | |||
215 | * 15. Enable interrupts. | 223 | * 15. Enable interrupts. |
216 | * | 224 | * |
217 | * What does that mean for us? Well, first we set data.count to the number | 225 | * What does that mean for us? Well, first we set data.count to the number |
218 | * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait | 226 | * of CPUs. As each CPU announces that it started the rendezvous handler by |
219 | * until it hits 0 and proceed. We set the data.gate flag and reset data.count. | 227 | * decrementing the count, We reset data.count and set the data.gate flag |
220 | * Meanwhile, they are waiting for that flag to be set. Once it's set, each | 228 | * allowing all the cpu's to proceed with the work. As each cpu disables |
229 | * interrupts, it'll decrement data.count once. We wait until it hits 0 and | ||
230 | * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they | ||
231 | * are waiting for that flag to be cleared. Once it's cleared, each | ||
221 | * CPU goes through the transition of updating MTRRs. | 232 | * CPU goes through the transition of updating MTRRs. |
222 | * The CPU vendors may each do it differently, | 233 | * The CPU vendors may each do it differently, |
223 | * so we call mtrr_if->set() callback and let them take care of it. | 234 | * so we call mtrr_if->set() callback and let them take care of it. |
224 | * When they're done, they again decrement data->count and wait for data.gate | 235 | * When they're done, they again decrement data->count and wait for data.gate |
225 | * to be reset. | 236 | * to be set. |
226 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag | 237 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag |
227 | * Everyone then enables interrupts and we all continue on. | 238 | * Everyone then enables interrupts and we all continue on. |
228 | * | 239 | * |
@@ -234,6 +245,9 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
234 | { | 245 | { |
235 | struct set_mtrr_data data; | 246 | struct set_mtrr_data data; |
236 | unsigned long flags; | 247 | unsigned long flags; |
248 | int cpu; | ||
249 | |||
250 | preempt_disable(); | ||
237 | 251 | ||
238 | data.smp_reg = reg; | 252 | data.smp_reg = reg; |
239 | data.smp_base = base; | 253 | data.smp_base = base; |
@@ -246,10 +260,15 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
246 | atomic_set(&data.gate, 0); | 260 | atomic_set(&data.gate, 0); |
247 | 261 | ||
248 | /* Start the ball rolling on other CPUs */ | 262 | /* Start the ball rolling on other CPUs */ |
249 | if (smp_call_function(ipi_handler, &data, 0) != 0) | 263 | for_each_online_cpu(cpu) { |
250 | panic("mtrr: timed out waiting for other CPUs\n"); | 264 | struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu); |
265 | |||
266 | if (cpu == smp_processor_id()) | ||
267 | continue; | ||
268 | |||
269 | stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work); | ||
270 | } | ||
251 | 271 | ||
252 | local_irq_save(flags); | ||
253 | 272 | ||
254 | while (atomic_read(&data.count)) | 273 | while (atomic_read(&data.count)) |
255 | cpu_relax(); | 274 | cpu_relax(); |
@@ -259,6 +278,16 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
259 | smp_wmb(); | 278 | smp_wmb(); |
260 | atomic_set(&data.gate, 1); | 279 | atomic_set(&data.gate, 1); |
261 | 280 | ||
281 | local_irq_save(flags); | ||
282 | |||
283 | while (atomic_read(&data.count)) | ||
284 | cpu_relax(); | ||
285 | |||
286 | /* Ok, reset count and toggle gate */ | ||
287 | atomic_set(&data.count, num_booting_cpus() - 1); | ||
288 | smp_wmb(); | ||
289 | atomic_set(&data.gate, 0); | ||
290 | |||
262 | /* Do our MTRR business */ | 291 | /* Do our MTRR business */ |
263 | 292 | ||
264 | /* | 293 | /* |
@@ -279,7 +308,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
279 | 308 | ||
280 | atomic_set(&data.count, num_booting_cpus() - 1); | 309 | atomic_set(&data.count, num_booting_cpus() - 1); |
281 | smp_wmb(); | 310 | smp_wmb(); |
282 | atomic_set(&data.gate, 0); | 311 | atomic_set(&data.gate, 1); |
283 | 312 | ||
284 | /* | 313 | /* |
285 | * Wait here for everyone to have seen the gate change | 314 | * Wait here for everyone to have seen the gate change |
@@ -289,6 +318,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
289 | cpu_relax(); | 318 | cpu_relax(); |
290 | 319 | ||
291 | local_irq_restore(flags); | 320 | local_irq_restore(flags); |
321 | preempt_enable(); | ||
292 | } | 322 | } |
293 | 323 | ||
294 | /** | 324 | /** |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index db5bdc8addf8..03a5b0385ad6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -31,46 +31,51 @@ | |||
31 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <asm/compat.h> | 32 | #include <asm/compat.h> |
33 | 33 | ||
34 | static u64 perf_event_mask __read_mostly; | 34 | #if 0 |
35 | #undef wrmsrl | ||
36 | #define wrmsrl(msr, val) \ | ||
37 | do { \ | ||
38 | trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ | ||
39 | (unsigned long)(val)); \ | ||
40 | native_write_msr((msr), (u32)((u64)(val)), \ | ||
41 | (u32)((u64)(val) >> 32)); \ | ||
42 | } while (0) | ||
43 | #endif | ||
35 | 44 | ||
36 | /* The maximal number of PEBS events: */ | 45 | /* |
37 | #define MAX_PEBS_EVENTS 4 | 46 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context |
47 | */ | ||
48 | static unsigned long | ||
49 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
50 | { | ||
51 | unsigned long offset, addr = (unsigned long)from; | ||
52 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
53 | unsigned long size, len = 0; | ||
54 | struct page *page; | ||
55 | void *map; | ||
56 | int ret; | ||
38 | 57 | ||
39 | /* The size of a BTS record in bytes: */ | 58 | do { |
40 | #define BTS_RECORD_SIZE 24 | 59 | ret = __get_user_pages_fast(addr, 1, 0, &page); |
60 | if (!ret) | ||
61 | break; | ||
41 | 62 | ||
42 | /* The size of a per-cpu BTS buffer in bytes: */ | 63 | offset = addr & (PAGE_SIZE - 1); |
43 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) | 64 | size = min(PAGE_SIZE - offset, n - len); |
44 | 65 | ||
45 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | 66 | map = kmap_atomic(page, type); |
46 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) | 67 | memcpy(to, map+offset, size); |
68 | kunmap_atomic(map, type); | ||
69 | put_page(page); | ||
47 | 70 | ||
71 | len += size; | ||
72 | to += size; | ||
73 | addr += size; | ||
48 | 74 | ||
49 | /* | 75 | } while (len < n); |
50 | * Bits in the debugctlmsr controlling branch tracing. | ||
51 | */ | ||
52 | #define X86_DEBUGCTL_TR (1 << 6) | ||
53 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
54 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
55 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
56 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
57 | 76 | ||
58 | /* | 77 | return len; |
59 | * A debug store configuration. | 78 | } |
60 | * | ||
61 | * We only support architectures that use 64bit fields. | ||
62 | */ | ||
63 | struct debug_store { | ||
64 | u64 bts_buffer_base; | ||
65 | u64 bts_index; | ||
66 | u64 bts_absolute_maximum; | ||
67 | u64 bts_interrupt_threshold; | ||
68 | u64 pebs_buffer_base; | ||
69 | u64 pebs_index; | ||
70 | u64 pebs_absolute_maximum; | ||
71 | u64 pebs_interrupt_threshold; | ||
72 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
73 | }; | ||
74 | 79 | ||
75 | struct event_constraint { | 80 | struct event_constraint { |
76 | union { | 81 | union { |
@@ -89,18 +94,43 @@ struct amd_nb { | |||
89 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | 94 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
90 | }; | 95 | }; |
91 | 96 | ||
97 | #define MAX_LBR_ENTRIES 16 | ||
98 | |||
92 | struct cpu_hw_events { | 99 | struct cpu_hw_events { |
100 | /* | ||
101 | * Generic x86 PMC bits | ||
102 | */ | ||
93 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | 103 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
94 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 104 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
95 | unsigned long interrupts; | 105 | unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
96 | int enabled; | 106 | int enabled; |
97 | struct debug_store *ds; | ||
98 | 107 | ||
99 | int n_events; | 108 | int n_events; |
100 | int n_added; | 109 | int n_added; |
110 | int n_txn; | ||
101 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | 111 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
102 | u64 tags[X86_PMC_IDX_MAX]; | 112 | u64 tags[X86_PMC_IDX_MAX]; |
103 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 113 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
114 | |||
115 | unsigned int group_flag; | ||
116 | |||
117 | /* | ||
118 | * Intel DebugStore bits | ||
119 | */ | ||
120 | struct debug_store *ds; | ||
121 | u64 pebs_enabled; | ||
122 | |||
123 | /* | ||
124 | * Intel LBR bits | ||
125 | */ | ||
126 | int lbr_users; | ||
127 | void *lbr_context; | ||
128 | struct perf_branch_stack lbr_stack; | ||
129 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | ||
130 | |||
131 | /* | ||
132 | * AMD specific bits | ||
133 | */ | ||
104 | struct amd_nb *amd_nb; | 134 | struct amd_nb *amd_nb; |
105 | }; | 135 | }; |
106 | 136 | ||
@@ -114,44 +144,75 @@ struct cpu_hw_events { | |||
114 | #define EVENT_CONSTRAINT(c, n, m) \ | 144 | #define EVENT_CONSTRAINT(c, n, m) \ |
115 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 145 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) |
116 | 146 | ||
147 | /* | ||
148 | * Constraint on the Event code. | ||
149 | */ | ||
117 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | 150 | #define INTEL_EVENT_CONSTRAINT(c, n) \ |
118 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) | 151 | EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) |
119 | 152 | ||
153 | /* | ||
154 | * Constraint on the Event code + UMask + fixed-mask | ||
155 | * | ||
156 | * filter mask to validate fixed counter events. | ||
157 | * the following filters disqualify for fixed counters: | ||
158 | * - inv | ||
159 | * - edge | ||
160 | * - cnt-mask | ||
161 | * The other filters are supported by fixed counters. | ||
162 | * The any-thread option is supported starting with v3. | ||
163 | */ | ||
120 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | 164 | #define FIXED_EVENT_CONSTRAINT(c, n) \ |
121 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) | 165 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) |
166 | |||
167 | /* | ||
168 | * Constraint on the Event code + UMask | ||
169 | */ | ||
170 | #define PEBS_EVENT_CONSTRAINT(c, n) \ | ||
171 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | ||
122 | 172 | ||
123 | #define EVENT_CONSTRAINT_END \ | 173 | #define EVENT_CONSTRAINT_END \ |
124 | EVENT_CONSTRAINT(0, 0, 0) | 174 | EVENT_CONSTRAINT(0, 0, 0) |
125 | 175 | ||
126 | #define for_each_event_constraint(e, c) \ | 176 | #define for_each_event_constraint(e, c) \ |
127 | for ((e) = (c); (e)->cmask; (e)++) | 177 | for ((e) = (c); (e)->weight; (e)++) |
178 | |||
179 | union perf_capabilities { | ||
180 | struct { | ||
181 | u64 lbr_format : 6; | ||
182 | u64 pebs_trap : 1; | ||
183 | u64 pebs_arch_reg : 1; | ||
184 | u64 pebs_format : 4; | ||
185 | u64 smm_freeze : 1; | ||
186 | }; | ||
187 | u64 capabilities; | ||
188 | }; | ||
128 | 189 | ||
129 | /* | 190 | /* |
130 | * struct x86_pmu - generic x86 pmu | 191 | * struct x86_pmu - generic x86 pmu |
131 | */ | 192 | */ |
132 | struct x86_pmu { | 193 | struct x86_pmu { |
194 | /* | ||
195 | * Generic x86 PMC bits | ||
196 | */ | ||
133 | const char *name; | 197 | const char *name; |
134 | int version; | 198 | int version; |
135 | int (*handle_irq)(struct pt_regs *); | 199 | int (*handle_irq)(struct pt_regs *); |
136 | void (*disable_all)(void); | 200 | void (*disable_all)(void); |
137 | void (*enable_all)(void); | 201 | void (*enable_all)(int added); |
138 | void (*enable)(struct perf_event *); | 202 | void (*enable)(struct perf_event *); |
139 | void (*disable)(struct perf_event *); | 203 | void (*disable)(struct perf_event *); |
204 | int (*hw_config)(struct perf_event *event); | ||
205 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); | ||
140 | unsigned eventsel; | 206 | unsigned eventsel; |
141 | unsigned perfctr; | 207 | unsigned perfctr; |
142 | u64 (*event_map)(int); | 208 | u64 (*event_map)(int); |
143 | u64 (*raw_event)(u64); | ||
144 | int max_events; | 209 | int max_events; |
145 | int num_events; | 210 | int num_counters; |
146 | int num_events_fixed; | 211 | int num_counters_fixed; |
147 | int event_bits; | 212 | int cntval_bits; |
148 | u64 event_mask; | 213 | u64 cntval_mask; |
149 | int apic; | 214 | int apic; |
150 | u64 max_period; | 215 | u64 max_period; |
151 | u64 intel_ctrl; | ||
152 | void (*enable_bts)(u64 config); | ||
153 | void (*disable_bts)(void); | ||
154 | |||
155 | struct event_constraint * | 216 | struct event_constraint * |
156 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | 217 | (*get_event_constraints)(struct cpu_hw_events *cpuc, |
157 | struct perf_event *event); | 218 | struct perf_event *event); |
@@ -159,11 +220,33 @@ struct x86_pmu { | |||
159 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 220 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
160 | struct perf_event *event); | 221 | struct perf_event *event); |
161 | struct event_constraint *event_constraints; | 222 | struct event_constraint *event_constraints; |
223 | void (*quirks)(void); | ||
224 | int perfctr_second_write; | ||
162 | 225 | ||
163 | int (*cpu_prepare)(int cpu); | 226 | int (*cpu_prepare)(int cpu); |
164 | void (*cpu_starting)(int cpu); | 227 | void (*cpu_starting)(int cpu); |
165 | void (*cpu_dying)(int cpu); | 228 | void (*cpu_dying)(int cpu); |
166 | void (*cpu_dead)(int cpu); | 229 | void (*cpu_dead)(int cpu); |
230 | |||
231 | /* | ||
232 | * Intel Arch Perfmon v2+ | ||
233 | */ | ||
234 | u64 intel_ctrl; | ||
235 | union perf_capabilities intel_cap; | ||
236 | |||
237 | /* | ||
238 | * Intel DebugStore bits | ||
239 | */ | ||
240 | int bts, pebs; | ||
241 | int pebs_record_size; | ||
242 | void (*drain_pebs)(struct pt_regs *regs); | ||
243 | struct event_constraint *pebs_constraints; | ||
244 | |||
245 | /* | ||
246 | * Intel LBR | ||
247 | */ | ||
248 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | ||
249 | int lbr_nr; /* hardware stack size */ | ||
167 | }; | 250 | }; |
168 | 251 | ||
169 | static struct x86_pmu x86_pmu __read_mostly; | 252 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -198,7 +281,7 @@ static u64 | |||
198 | x86_perf_event_update(struct perf_event *event) | 281 | x86_perf_event_update(struct perf_event *event) |
199 | { | 282 | { |
200 | struct hw_perf_event *hwc = &event->hw; | 283 | struct hw_perf_event *hwc = &event->hw; |
201 | int shift = 64 - x86_pmu.event_bits; | 284 | int shift = 64 - x86_pmu.cntval_bits; |
202 | u64 prev_raw_count, new_raw_count; | 285 | u64 prev_raw_count, new_raw_count; |
203 | int idx = hwc->idx; | 286 | int idx = hwc->idx; |
204 | s64 delta; | 287 | s64 delta; |
@@ -214,10 +297,10 @@ x86_perf_event_update(struct perf_event *event) | |||
214 | * count to the generic event atomically: | 297 | * count to the generic event atomically: |
215 | */ | 298 | */ |
216 | again: | 299 | again: |
217 | prev_raw_count = atomic64_read(&hwc->prev_count); | 300 | prev_raw_count = local64_read(&hwc->prev_count); |
218 | rdmsrl(hwc->event_base + idx, new_raw_count); | 301 | rdmsrl(hwc->event_base + idx, new_raw_count); |
219 | 302 | ||
220 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | 303 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
221 | new_raw_count) != prev_raw_count) | 304 | new_raw_count) != prev_raw_count) |
222 | goto again; | 305 | goto again; |
223 | 306 | ||
@@ -232,8 +315,8 @@ again: | |||
232 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | 315 | delta = (new_raw_count << shift) - (prev_raw_count << shift); |
233 | delta >>= shift; | 316 | delta >>= shift; |
234 | 317 | ||
235 | atomic64_add(delta, &event->count); | 318 | local64_add(delta, &event->count); |
236 | atomic64_sub(delta, &hwc->period_left); | 319 | local64_sub(delta, &hwc->period_left); |
237 | 320 | ||
238 | return new_raw_count; | 321 | return new_raw_count; |
239 | } | 322 | } |
@@ -241,33 +324,32 @@ again: | |||
241 | static atomic_t active_events; | 324 | static atomic_t active_events; |
242 | static DEFINE_MUTEX(pmc_reserve_mutex); | 325 | static DEFINE_MUTEX(pmc_reserve_mutex); |
243 | 326 | ||
327 | #ifdef CONFIG_X86_LOCAL_APIC | ||
328 | |||
244 | static bool reserve_pmc_hardware(void) | 329 | static bool reserve_pmc_hardware(void) |
245 | { | 330 | { |
246 | #ifdef CONFIG_X86_LOCAL_APIC | ||
247 | int i; | 331 | int i; |
248 | 332 | ||
249 | if (nmi_watchdog == NMI_LOCAL_APIC) | 333 | if (nmi_watchdog == NMI_LOCAL_APIC) |
250 | disable_lapic_nmi_watchdog(); | 334 | disable_lapic_nmi_watchdog(); |
251 | 335 | ||
252 | for (i = 0; i < x86_pmu.num_events; i++) { | 336 | for (i = 0; i < x86_pmu.num_counters; i++) { |
253 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | 337 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) |
254 | goto perfctr_fail; | 338 | goto perfctr_fail; |
255 | } | 339 | } |
256 | 340 | ||
257 | for (i = 0; i < x86_pmu.num_events; i++) { | 341 | for (i = 0; i < x86_pmu.num_counters; i++) { |
258 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | 342 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) |
259 | goto eventsel_fail; | 343 | goto eventsel_fail; |
260 | } | 344 | } |
261 | #endif | ||
262 | 345 | ||
263 | return true; | 346 | return true; |
264 | 347 | ||
265 | #ifdef CONFIG_X86_LOCAL_APIC | ||
266 | eventsel_fail: | 348 | eventsel_fail: |
267 | for (i--; i >= 0; i--) | 349 | for (i--; i >= 0; i--) |
268 | release_evntsel_nmi(x86_pmu.eventsel + i); | 350 | release_evntsel_nmi(x86_pmu.eventsel + i); |
269 | 351 | ||
270 | i = x86_pmu.num_events; | 352 | i = x86_pmu.num_counters; |
271 | 353 | ||
272 | perfctr_fail: | 354 | perfctr_fail: |
273 | for (i--; i >= 0; i--) | 355 | for (i--; i >= 0; i--) |
@@ -277,128 +359,36 @@ perfctr_fail: | |||
277 | enable_lapic_nmi_watchdog(); | 359 | enable_lapic_nmi_watchdog(); |
278 | 360 | ||
279 | return false; | 361 | return false; |
280 | #endif | ||
281 | } | 362 | } |
282 | 363 | ||
283 | static void release_pmc_hardware(void) | 364 | static void release_pmc_hardware(void) |
284 | { | 365 | { |
285 | #ifdef CONFIG_X86_LOCAL_APIC | ||
286 | int i; | 366 | int i; |
287 | 367 | ||
288 | for (i = 0; i < x86_pmu.num_events; i++) { | 368 | for (i = 0; i < x86_pmu.num_counters; i++) { |
289 | release_perfctr_nmi(x86_pmu.perfctr + i); | 369 | release_perfctr_nmi(x86_pmu.perfctr + i); |
290 | release_evntsel_nmi(x86_pmu.eventsel + i); | 370 | release_evntsel_nmi(x86_pmu.eventsel + i); |
291 | } | 371 | } |
292 | 372 | ||
293 | if (nmi_watchdog == NMI_LOCAL_APIC) | 373 | if (nmi_watchdog == NMI_LOCAL_APIC) |
294 | enable_lapic_nmi_watchdog(); | 374 | enable_lapic_nmi_watchdog(); |
295 | #endif | ||
296 | } | 375 | } |
297 | 376 | ||
298 | static inline bool bts_available(void) | 377 | #else |
299 | { | ||
300 | return x86_pmu.enable_bts != NULL; | ||
301 | } | ||
302 | |||
303 | static void init_debug_store_on_cpu(int cpu) | ||
304 | { | ||
305 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
306 | |||
307 | if (!ds) | ||
308 | return; | ||
309 | |||
310 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
311 | (u32)((u64)(unsigned long)ds), | ||
312 | (u32)((u64)(unsigned long)ds >> 32)); | ||
313 | } | ||
314 | |||
315 | static void fini_debug_store_on_cpu(int cpu) | ||
316 | { | ||
317 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
318 | return; | ||
319 | |||
320 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
321 | } | ||
322 | |||
323 | static void release_bts_hardware(void) | ||
324 | { | ||
325 | int cpu; | ||
326 | |||
327 | if (!bts_available()) | ||
328 | return; | ||
329 | |||
330 | get_online_cpus(); | ||
331 | |||
332 | for_each_online_cpu(cpu) | ||
333 | fini_debug_store_on_cpu(cpu); | ||
334 | |||
335 | for_each_possible_cpu(cpu) { | ||
336 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
337 | |||
338 | if (!ds) | ||
339 | continue; | ||
340 | |||
341 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
342 | |||
343 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
344 | kfree(ds); | ||
345 | } | ||
346 | |||
347 | put_online_cpus(); | ||
348 | } | ||
349 | |||
350 | static int reserve_bts_hardware(void) | ||
351 | { | ||
352 | int cpu, err = 0; | ||
353 | |||
354 | if (!bts_available()) | ||
355 | return 0; | ||
356 | |||
357 | get_online_cpus(); | ||
358 | |||
359 | for_each_possible_cpu(cpu) { | ||
360 | struct debug_store *ds; | ||
361 | void *buffer; | ||
362 | |||
363 | err = -ENOMEM; | ||
364 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
365 | if (unlikely(!buffer)) | ||
366 | break; | ||
367 | |||
368 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
369 | if (unlikely(!ds)) { | ||
370 | kfree(buffer); | ||
371 | break; | ||
372 | } | ||
373 | 378 | ||
374 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | 379 | static bool reserve_pmc_hardware(void) { return true; } |
375 | ds->bts_index = ds->bts_buffer_base; | 380 | static void release_pmc_hardware(void) {} |
376 | ds->bts_absolute_maximum = | ||
377 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
378 | ds->bts_interrupt_threshold = | ||
379 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
380 | 381 | ||
381 | per_cpu(cpu_hw_events, cpu).ds = ds; | 382 | #endif |
382 | err = 0; | ||
383 | } | ||
384 | |||
385 | if (err) | ||
386 | release_bts_hardware(); | ||
387 | else { | ||
388 | for_each_online_cpu(cpu) | ||
389 | init_debug_store_on_cpu(cpu); | ||
390 | } | ||
391 | |||
392 | put_online_cpus(); | ||
393 | 383 | ||
394 | return err; | 384 | static int reserve_ds_buffers(void); |
395 | } | 385 | static void release_ds_buffers(void); |
396 | 386 | ||
397 | static void hw_perf_event_destroy(struct perf_event *event) | 387 | static void hw_perf_event_destroy(struct perf_event *event) |
398 | { | 388 | { |
399 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { | 389 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { |
400 | release_pmc_hardware(); | 390 | release_pmc_hardware(); |
401 | release_bts_hardware(); | 391 | release_ds_buffers(); |
402 | mutex_unlock(&pmc_reserve_mutex); | 392 | mutex_unlock(&pmc_reserve_mutex); |
403 | } | 393 | } |
404 | } | 394 | } |
@@ -441,59 +431,16 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
441 | return 0; | 431 | return 0; |
442 | } | 432 | } |
443 | 433 | ||
444 | /* | 434 | static int x86_setup_perfctr(struct perf_event *event) |
445 | * Setup the hardware configuration for a given attr_type | ||
446 | */ | ||
447 | static int __hw_perf_event_init(struct perf_event *event) | ||
448 | { | 435 | { |
449 | struct perf_event_attr *attr = &event->attr; | 436 | struct perf_event_attr *attr = &event->attr; |
450 | struct hw_perf_event *hwc = &event->hw; | 437 | struct hw_perf_event *hwc = &event->hw; |
451 | u64 config; | 438 | u64 config; |
452 | int err; | ||
453 | |||
454 | if (!x86_pmu_initialized()) | ||
455 | return -ENODEV; | ||
456 | |||
457 | err = 0; | ||
458 | if (!atomic_inc_not_zero(&active_events)) { | ||
459 | mutex_lock(&pmc_reserve_mutex); | ||
460 | if (atomic_read(&active_events) == 0) { | ||
461 | if (!reserve_pmc_hardware()) | ||
462 | err = -EBUSY; | ||
463 | else | ||
464 | err = reserve_bts_hardware(); | ||
465 | } | ||
466 | if (!err) | ||
467 | atomic_inc(&active_events); | ||
468 | mutex_unlock(&pmc_reserve_mutex); | ||
469 | } | ||
470 | if (err) | ||
471 | return err; | ||
472 | |||
473 | event->destroy = hw_perf_event_destroy; | ||
474 | |||
475 | /* | ||
476 | * Generate PMC IRQs: | ||
477 | * (keep 'enabled' bit clear for now) | ||
478 | */ | ||
479 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
480 | |||
481 | hwc->idx = -1; | ||
482 | hwc->last_cpu = -1; | ||
483 | hwc->last_tag = ~0ULL; | ||
484 | |||
485 | /* | ||
486 | * Count user and OS events unless requested not to. | ||
487 | */ | ||
488 | if (!attr->exclude_user) | ||
489 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
490 | if (!attr->exclude_kernel) | ||
491 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
492 | 439 | ||
493 | if (!hwc->sample_period) { | 440 | if (!hwc->sample_period) { |
494 | hwc->sample_period = x86_pmu.max_period; | 441 | hwc->sample_period = x86_pmu.max_period; |
495 | hwc->last_period = hwc->sample_period; | 442 | hwc->last_period = hwc->sample_period; |
496 | atomic64_set(&hwc->period_left, hwc->sample_period); | 443 | local64_set(&hwc->period_left, hwc->sample_period); |
497 | } else { | 444 | } else { |
498 | /* | 445 | /* |
499 | * If we have a PMU initialized but no APIC | 446 | * If we have a PMU initialized but no APIC |
@@ -505,16 +452,8 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
505 | return -EOPNOTSUPP; | 452 | return -EOPNOTSUPP; |
506 | } | 453 | } |
507 | 454 | ||
508 | /* | 455 | if (attr->type == PERF_TYPE_RAW) |
509 | * Raw hw_event type provide the config in the hw_event structure | ||
510 | */ | ||
511 | if (attr->type == PERF_TYPE_RAW) { | ||
512 | hwc->config |= x86_pmu.raw_event(attr->config); | ||
513 | if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) && | ||
514 | perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
515 | return -EACCES; | ||
516 | return 0; | 456 | return 0; |
517 | } | ||
518 | 457 | ||
519 | if (attr->type == PERF_TYPE_HW_CACHE) | 458 | if (attr->type == PERF_TYPE_HW_CACHE) |
520 | return set_ext_hw_attr(hwc, attr); | 459 | return set_ext_hw_attr(hwc, attr); |
@@ -539,11 +478,11 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
539 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 478 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && |
540 | (hwc->sample_period == 1)) { | 479 | (hwc->sample_period == 1)) { |
541 | /* BTS is not supported by this architecture. */ | 480 | /* BTS is not supported by this architecture. */ |
542 | if (!bts_available()) | 481 | if (!x86_pmu.bts) |
543 | return -EOPNOTSUPP; | 482 | return -EOPNOTSUPP; |
544 | 483 | ||
545 | /* BTS is currently only allowed for user-mode. */ | 484 | /* BTS is currently only allowed for user-mode. */ |
546 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | 485 | if (!attr->exclude_kernel) |
547 | return -EOPNOTSUPP; | 486 | return -EOPNOTSUPP; |
548 | } | 487 | } |
549 | 488 | ||
@@ -552,12 +491,87 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
552 | return 0; | 491 | return 0; |
553 | } | 492 | } |
554 | 493 | ||
494 | static int x86_pmu_hw_config(struct perf_event *event) | ||
495 | { | ||
496 | if (event->attr.precise_ip) { | ||
497 | int precise = 0; | ||
498 | |||
499 | /* Support for constant skid */ | ||
500 | if (x86_pmu.pebs) | ||
501 | precise++; | ||
502 | |||
503 | /* Support for IP fixup */ | ||
504 | if (x86_pmu.lbr_nr) | ||
505 | precise++; | ||
506 | |||
507 | if (event->attr.precise_ip > precise) | ||
508 | return -EOPNOTSUPP; | ||
509 | } | ||
510 | |||
511 | /* | ||
512 | * Generate PMC IRQs: | ||
513 | * (keep 'enabled' bit clear for now) | ||
514 | */ | ||
515 | event->hw.config = ARCH_PERFMON_EVENTSEL_INT; | ||
516 | |||
517 | /* | ||
518 | * Count user and OS events unless requested not to | ||
519 | */ | ||
520 | if (!event->attr.exclude_user) | ||
521 | event->hw.config |= ARCH_PERFMON_EVENTSEL_USR; | ||
522 | if (!event->attr.exclude_kernel) | ||
523 | event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; | ||
524 | |||
525 | if (event->attr.type == PERF_TYPE_RAW) | ||
526 | event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; | ||
527 | |||
528 | return x86_setup_perfctr(event); | ||
529 | } | ||
530 | |||
531 | /* | ||
532 | * Setup the hardware configuration for a given attr_type | ||
533 | */ | ||
534 | static int __hw_perf_event_init(struct perf_event *event) | ||
535 | { | ||
536 | int err; | ||
537 | |||
538 | if (!x86_pmu_initialized()) | ||
539 | return -ENODEV; | ||
540 | |||
541 | err = 0; | ||
542 | if (!atomic_inc_not_zero(&active_events)) { | ||
543 | mutex_lock(&pmc_reserve_mutex); | ||
544 | if (atomic_read(&active_events) == 0) { | ||
545 | if (!reserve_pmc_hardware()) | ||
546 | err = -EBUSY; | ||
547 | else { | ||
548 | err = reserve_ds_buffers(); | ||
549 | if (err) | ||
550 | release_pmc_hardware(); | ||
551 | } | ||
552 | } | ||
553 | if (!err) | ||
554 | atomic_inc(&active_events); | ||
555 | mutex_unlock(&pmc_reserve_mutex); | ||
556 | } | ||
557 | if (err) | ||
558 | return err; | ||
559 | |||
560 | event->destroy = hw_perf_event_destroy; | ||
561 | |||
562 | event->hw.idx = -1; | ||
563 | event->hw.last_cpu = -1; | ||
564 | event->hw.last_tag = ~0ULL; | ||
565 | |||
566 | return x86_pmu.hw_config(event); | ||
567 | } | ||
568 | |||
555 | static void x86_pmu_disable_all(void) | 569 | static void x86_pmu_disable_all(void) |
556 | { | 570 | { |
557 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 571 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
558 | int idx; | 572 | int idx; |
559 | 573 | ||
560 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 574 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
561 | u64 val; | 575 | u64 val; |
562 | 576 | ||
563 | if (!test_bit(idx, cpuc->active_mask)) | 577 | if (!test_bit(idx, cpuc->active_mask)) |
@@ -587,12 +601,12 @@ void hw_perf_disable(void) | |||
587 | x86_pmu.disable_all(); | 601 | x86_pmu.disable_all(); |
588 | } | 602 | } |
589 | 603 | ||
590 | static void x86_pmu_enable_all(void) | 604 | static void x86_pmu_enable_all(int added) |
591 | { | 605 | { |
592 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 606 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
593 | int idx; | 607 | int idx; |
594 | 608 | ||
595 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 609 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
596 | struct perf_event *event = cpuc->events[idx]; | 610 | struct perf_event *event = cpuc->events[idx]; |
597 | u64 val; | 611 | u64 val; |
598 | 612 | ||
@@ -667,14 +681,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
667 | * assign events to counters starting with most | 681 | * assign events to counters starting with most |
668 | * constrained events. | 682 | * constrained events. |
669 | */ | 683 | */ |
670 | wmax = x86_pmu.num_events; | 684 | wmax = x86_pmu.num_counters; |
671 | 685 | ||
672 | /* | 686 | /* |
673 | * when fixed event counters are present, | 687 | * when fixed event counters are present, |
674 | * wmax is incremented by 1 to account | 688 | * wmax is incremented by 1 to account |
675 | * for one more choice | 689 | * for one more choice |
676 | */ | 690 | */ |
677 | if (x86_pmu.num_events_fixed) | 691 | if (x86_pmu.num_counters_fixed) |
678 | wmax++; | 692 | wmax++; |
679 | 693 | ||
680 | for (w = 1, num = n; num && w <= wmax; w++) { | 694 | for (w = 1, num = n; num && w <= wmax; w++) { |
@@ -724,7 +738,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, | |||
724 | struct perf_event *event; | 738 | struct perf_event *event; |
725 | int n, max_count; | 739 | int n, max_count; |
726 | 740 | ||
727 | max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; | 741 | max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed; |
728 | 742 | ||
729 | /* current number of events already accepted */ | 743 | /* current number of events already accepted */ |
730 | n = cpuc->n_events; | 744 | n = cpuc->n_events; |
@@ -795,7 +809,7 @@ void hw_perf_enable(void) | |||
795 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 809 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
796 | struct perf_event *event; | 810 | struct perf_event *event; |
797 | struct hw_perf_event *hwc; | 811 | struct hw_perf_event *hwc; |
798 | int i; | 812 | int i, added = cpuc->n_added; |
799 | 813 | ||
800 | if (!x86_pmu_initialized()) | 814 | if (!x86_pmu_initialized()) |
801 | return; | 815 | return; |
@@ -847,19 +861,20 @@ void hw_perf_enable(void) | |||
847 | cpuc->enabled = 1; | 861 | cpuc->enabled = 1; |
848 | barrier(); | 862 | barrier(); |
849 | 863 | ||
850 | x86_pmu.enable_all(); | 864 | x86_pmu.enable_all(added); |
851 | } | 865 | } |
852 | 866 | ||
853 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) | 867 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
868 | u64 enable_mask) | ||
854 | { | 869 | { |
855 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 870 | wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); |
856 | hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
857 | } | 871 | } |
858 | 872 | ||
859 | static inline void x86_pmu_disable_event(struct perf_event *event) | 873 | static inline void x86_pmu_disable_event(struct perf_event *event) |
860 | { | 874 | { |
861 | struct hw_perf_event *hwc = &event->hw; | 875 | struct hw_perf_event *hwc = &event->hw; |
862 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); | 876 | |
877 | wrmsrl(hwc->config_base + hwc->idx, hwc->config); | ||
863 | } | 878 | } |
864 | 879 | ||
865 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 880 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
@@ -872,9 +887,9 @@ static int | |||
872 | x86_perf_event_set_period(struct perf_event *event) | 887 | x86_perf_event_set_period(struct perf_event *event) |
873 | { | 888 | { |
874 | struct hw_perf_event *hwc = &event->hw; | 889 | struct hw_perf_event *hwc = &event->hw; |
875 | s64 left = atomic64_read(&hwc->period_left); | 890 | s64 left = local64_read(&hwc->period_left); |
876 | s64 period = hwc->sample_period; | 891 | s64 period = hwc->sample_period; |
877 | int err, ret = 0, idx = hwc->idx; | 892 | int ret = 0, idx = hwc->idx; |
878 | 893 | ||
879 | if (idx == X86_PMC_IDX_FIXED_BTS) | 894 | if (idx == X86_PMC_IDX_FIXED_BTS) |
880 | return 0; | 895 | return 0; |
@@ -884,14 +899,14 @@ x86_perf_event_set_period(struct perf_event *event) | |||
884 | */ | 899 | */ |
885 | if (unlikely(left <= -period)) { | 900 | if (unlikely(left <= -period)) { |
886 | left = period; | 901 | left = period; |
887 | atomic64_set(&hwc->period_left, left); | 902 | local64_set(&hwc->period_left, left); |
888 | hwc->last_period = period; | 903 | hwc->last_period = period; |
889 | ret = 1; | 904 | ret = 1; |
890 | } | 905 | } |
891 | 906 | ||
892 | if (unlikely(left <= 0)) { | 907 | if (unlikely(left <= 0)) { |
893 | left += period; | 908 | left += period; |
894 | atomic64_set(&hwc->period_left, left); | 909 | local64_set(&hwc->period_left, left); |
895 | hwc->last_period = period; | 910 | hwc->last_period = period; |
896 | ret = 1; | 911 | ret = 1; |
897 | } | 912 | } |
@@ -910,10 +925,19 @@ x86_perf_event_set_period(struct perf_event *event) | |||
910 | * The hw event starts counting from this event offset, | 925 | * The hw event starts counting from this event offset, |
911 | * mark it to be able to extra future deltas: | 926 | * mark it to be able to extra future deltas: |
912 | */ | 927 | */ |
913 | atomic64_set(&hwc->prev_count, (u64)-left); | 928 | local64_set(&hwc->prev_count, (u64)-left); |
914 | 929 | ||
915 | err = checking_wrmsrl(hwc->event_base + idx, | 930 | wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); |
916 | (u64)(-left) & x86_pmu.event_mask); | 931 | |
932 | /* | ||
933 | * Due to erratum on certan cpu we need | ||
934 | * a second write to be sure the register | ||
935 | * is updated properly | ||
936 | */ | ||
937 | if (x86_pmu.perfctr_second_write) { | ||
938 | wrmsrl(hwc->event_base + idx, | ||
939 | (u64)(-left) & x86_pmu.cntval_mask); | ||
940 | } | ||
917 | 941 | ||
918 | perf_event_update_userpage(event); | 942 | perf_event_update_userpage(event); |
919 | 943 | ||
@@ -924,7 +948,8 @@ static void x86_pmu_enable_event(struct perf_event *event) | |||
924 | { | 948 | { |
925 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 949 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
926 | if (cpuc->enabled) | 950 | if (cpuc->enabled) |
927 | __x86_pmu_enable_event(&event->hw); | 951 | __x86_pmu_enable_event(&event->hw, |
952 | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
928 | } | 953 | } |
929 | 954 | ||
930 | /* | 955 | /* |
@@ -950,7 +975,15 @@ static int x86_pmu_enable(struct perf_event *event) | |||
950 | if (n < 0) | 975 | if (n < 0) |
951 | return n; | 976 | return n; |
952 | 977 | ||
953 | ret = x86_schedule_events(cpuc, n, assign); | 978 | /* |
979 | * If group events scheduling transaction was started, | ||
980 | * skip the schedulability test here, it will be peformed | ||
981 | * at commit time(->commit_txn) as a whole | ||
982 | */ | ||
983 | if (cpuc->group_flag & PERF_EVENT_TXN) | ||
984 | goto out; | ||
985 | |||
986 | ret = x86_pmu.schedule_events(cpuc, n, assign); | ||
954 | if (ret) | 987 | if (ret) |
955 | return ret; | 988 | return ret; |
956 | /* | 989 | /* |
@@ -959,8 +992,10 @@ static int x86_pmu_enable(struct perf_event *event) | |||
959 | */ | 992 | */ |
960 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 993 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
961 | 994 | ||
995 | out: | ||
962 | cpuc->n_events = n; | 996 | cpuc->n_events = n; |
963 | cpuc->n_added += n - n0; | 997 | cpuc->n_added += n - n0; |
998 | cpuc->n_txn += n - n0; | ||
964 | 999 | ||
965 | return 0; | 1000 | return 0; |
966 | } | 1001 | } |
@@ -976,6 +1011,7 @@ static int x86_pmu_start(struct perf_event *event) | |||
976 | x86_perf_event_set_period(event); | 1011 | x86_perf_event_set_period(event); |
977 | cpuc->events[idx] = event; | 1012 | cpuc->events[idx] = event; |
978 | __set_bit(idx, cpuc->active_mask); | 1013 | __set_bit(idx, cpuc->active_mask); |
1014 | __set_bit(idx, cpuc->running); | ||
979 | x86_pmu.enable(event); | 1015 | x86_pmu.enable(event); |
980 | perf_event_update_userpage(event); | 1016 | perf_event_update_userpage(event); |
981 | 1017 | ||
@@ -991,11 +1027,12 @@ static void x86_pmu_unthrottle(struct perf_event *event) | |||
991 | void perf_event_print_debug(void) | 1027 | void perf_event_print_debug(void) |
992 | { | 1028 | { |
993 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | 1029 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; |
1030 | u64 pebs; | ||
994 | struct cpu_hw_events *cpuc; | 1031 | struct cpu_hw_events *cpuc; |
995 | unsigned long flags; | 1032 | unsigned long flags; |
996 | int cpu, idx; | 1033 | int cpu, idx; |
997 | 1034 | ||
998 | if (!x86_pmu.num_events) | 1035 | if (!x86_pmu.num_counters) |
999 | return; | 1036 | return; |
1000 | 1037 | ||
1001 | local_irq_save(flags); | 1038 | local_irq_save(flags); |
@@ -1008,16 +1045,18 @@ void perf_event_print_debug(void) | |||
1008 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | 1045 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); |
1009 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | 1046 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); |
1010 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | 1047 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); |
1048 | rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); | ||
1011 | 1049 | ||
1012 | pr_info("\n"); | 1050 | pr_info("\n"); |
1013 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | 1051 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); |
1014 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | 1052 | pr_info("CPU#%d: status: %016llx\n", cpu, status); |
1015 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | 1053 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); |
1016 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | 1054 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); |
1055 | pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); | ||
1017 | } | 1056 | } |
1018 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); | 1057 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
1019 | 1058 | ||
1020 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1059 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1021 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1060 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); |
1022 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | 1061 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); |
1023 | 1062 | ||
@@ -1030,7 +1069,7 @@ void perf_event_print_debug(void) | |||
1030 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", | 1069 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", |
1031 | cpu, idx, prev_left); | 1070 | cpu, idx, prev_left); |
1032 | } | 1071 | } |
1033 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | 1072 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { |
1034 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); | 1073 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); |
1035 | 1074 | ||
1036 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", | 1075 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", |
@@ -1064,6 +1103,14 @@ static void x86_pmu_disable(struct perf_event *event) | |||
1064 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1103 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1065 | int i; | 1104 | int i; |
1066 | 1105 | ||
1106 | /* | ||
1107 | * If we're called during a txn, we don't need to do anything. | ||
1108 | * The events never got scheduled and ->cancel_txn will truncate | ||
1109 | * the event_list. | ||
1110 | */ | ||
1111 | if (cpuc->group_flag & PERF_EVENT_TXN) | ||
1112 | return; | ||
1113 | |||
1067 | x86_pmu_stop(event); | 1114 | x86_pmu_stop(event); |
1068 | 1115 | ||
1069 | for (i = 0; i < cpuc->n_events; i++) { | 1116 | for (i = 0; i < cpuc->n_events; i++) { |
@@ -1095,21 +1142,29 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1095 | 1142 | ||
1096 | cpuc = &__get_cpu_var(cpu_hw_events); | 1143 | cpuc = &__get_cpu_var(cpu_hw_events); |
1097 | 1144 | ||
1098 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1145 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1099 | if (!test_bit(idx, cpuc->active_mask)) | 1146 | if (!test_bit(idx, cpuc->active_mask)) { |
1147 | /* | ||
1148 | * Though we deactivated the counter some cpus | ||
1149 | * might still deliver spurious interrupts still | ||
1150 | * in flight. Catch them: | ||
1151 | */ | ||
1152 | if (__test_and_clear_bit(idx, cpuc->running)) | ||
1153 | handled++; | ||
1100 | continue; | 1154 | continue; |
1155 | } | ||
1101 | 1156 | ||
1102 | event = cpuc->events[idx]; | 1157 | event = cpuc->events[idx]; |
1103 | hwc = &event->hw; | 1158 | hwc = &event->hw; |
1104 | 1159 | ||
1105 | val = x86_perf_event_update(event); | 1160 | val = x86_perf_event_update(event); |
1106 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | 1161 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) |
1107 | continue; | 1162 | continue; |
1108 | 1163 | ||
1109 | /* | 1164 | /* |
1110 | * event overflow | 1165 | * event overflow |
1111 | */ | 1166 | */ |
1112 | handled = 1; | 1167 | handled++; |
1113 | data.period = event->hw.last_period; | 1168 | data.period = event->hw.last_period; |
1114 | 1169 | ||
1115 | if (!x86_perf_event_set_period(event)) | 1170 | if (!x86_perf_event_set_period(event)) |
@@ -1146,7 +1201,6 @@ void set_perf_event_pending(void) | |||
1146 | 1201 | ||
1147 | void perf_events_lapic_init(void) | 1202 | void perf_events_lapic_init(void) |
1148 | { | 1203 | { |
1149 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1150 | if (!x86_pmu.apic || !x86_pmu_initialized()) | 1204 | if (!x86_pmu.apic || !x86_pmu_initialized()) |
1151 | return; | 1205 | return; |
1152 | 1206 | ||
@@ -1154,15 +1208,22 @@ void perf_events_lapic_init(void) | |||
1154 | * Always use NMI for PMU | 1208 | * Always use NMI for PMU |
1155 | */ | 1209 | */ |
1156 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1210 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1157 | #endif | ||
1158 | } | 1211 | } |
1159 | 1212 | ||
1213 | struct pmu_nmi_state { | ||
1214 | unsigned int marked; | ||
1215 | int handled; | ||
1216 | }; | ||
1217 | |||
1218 | static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi); | ||
1219 | |||
1160 | static int __kprobes | 1220 | static int __kprobes |
1161 | perf_event_nmi_handler(struct notifier_block *self, | 1221 | perf_event_nmi_handler(struct notifier_block *self, |
1162 | unsigned long cmd, void *__args) | 1222 | unsigned long cmd, void *__args) |
1163 | { | 1223 | { |
1164 | struct die_args *args = __args; | 1224 | struct die_args *args = __args; |
1165 | struct pt_regs *regs; | 1225 | unsigned int this_nmi; |
1226 | int handled; | ||
1166 | 1227 | ||
1167 | if (!atomic_read(&active_events)) | 1228 | if (!atomic_read(&active_events)) |
1168 | return NOTIFY_DONE; | 1229 | return NOTIFY_DONE; |
@@ -1171,24 +1232,47 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
1171 | case DIE_NMI: | 1232 | case DIE_NMI: |
1172 | case DIE_NMI_IPI: | 1233 | case DIE_NMI_IPI: |
1173 | break; | 1234 | break; |
1174 | 1235 | case DIE_NMIUNKNOWN: | |
1236 | this_nmi = percpu_read(irq_stat.__nmi_count); | ||
1237 | if (this_nmi != __get_cpu_var(pmu_nmi).marked) | ||
1238 | /* let the kernel handle the unknown nmi */ | ||
1239 | return NOTIFY_DONE; | ||
1240 | /* | ||
1241 | * This one is a PMU back-to-back nmi. Two events | ||
1242 | * trigger 'simultaneously' raising two back-to-back | ||
1243 | * NMIs. If the first NMI handles both, the latter | ||
1244 | * will be empty and daze the CPU. So, we drop it to | ||
1245 | * avoid false-positive 'unknown nmi' messages. | ||
1246 | */ | ||
1247 | return NOTIFY_STOP; | ||
1175 | default: | 1248 | default: |
1176 | return NOTIFY_DONE; | 1249 | return NOTIFY_DONE; |
1177 | } | 1250 | } |
1178 | 1251 | ||
1179 | regs = args->regs; | ||
1180 | |||
1181 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1182 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1252 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1183 | #endif | 1253 | |
1184 | /* | 1254 | handled = x86_pmu.handle_irq(args->regs); |
1185 | * Can't rely on the handled return value to say it was our NMI, two | 1255 | if (!handled) |
1186 | * events could trigger 'simultaneously' raising two back-to-back NMIs. | 1256 | return NOTIFY_DONE; |
1187 | * | 1257 | |
1188 | * If the first NMI handles both, the latter will be empty and daze | 1258 | this_nmi = percpu_read(irq_stat.__nmi_count); |
1189 | * the CPU. | 1259 | if ((handled > 1) || |
1190 | */ | 1260 | /* the next nmi could be a back-to-back nmi */ |
1191 | x86_pmu.handle_irq(regs); | 1261 | ((__get_cpu_var(pmu_nmi).marked == this_nmi) && |
1262 | (__get_cpu_var(pmu_nmi).handled > 1))) { | ||
1263 | /* | ||
1264 | * We could have two subsequent back-to-back nmis: The | ||
1265 | * first handles more than one counter, the 2nd | ||
1266 | * handles only one counter and the 3rd handles no | ||
1267 | * counter. | ||
1268 | * | ||
1269 | * This is the 2nd nmi because the previous was | ||
1270 | * handling more than one counter. We will mark the | ||
1271 | * next (3rd) and then drop it if unhandled. | ||
1272 | */ | ||
1273 | __get_cpu_var(pmu_nmi).marked = this_nmi + 1; | ||
1274 | __get_cpu_var(pmu_nmi).handled = handled; | ||
1275 | } | ||
1192 | 1276 | ||
1193 | return NOTIFY_STOP; | 1277 | return NOTIFY_STOP; |
1194 | } | 1278 | } |
@@ -1217,118 +1301,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
1217 | return &unconstrained; | 1301 | return &unconstrained; |
1218 | } | 1302 | } |
1219 | 1303 | ||
1220 | static int x86_event_sched_in(struct perf_event *event, | ||
1221 | struct perf_cpu_context *cpuctx) | ||
1222 | { | ||
1223 | int ret = 0; | ||
1224 | |||
1225 | event->state = PERF_EVENT_STATE_ACTIVE; | ||
1226 | event->oncpu = smp_processor_id(); | ||
1227 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; | ||
1228 | |||
1229 | if (!is_x86_event(event)) | ||
1230 | ret = event->pmu->enable(event); | ||
1231 | |||
1232 | if (!ret && !is_software_event(event)) | ||
1233 | cpuctx->active_oncpu++; | ||
1234 | |||
1235 | if (!ret && event->attr.exclusive) | ||
1236 | cpuctx->exclusive = 1; | ||
1237 | |||
1238 | return ret; | ||
1239 | } | ||
1240 | |||
1241 | static void x86_event_sched_out(struct perf_event *event, | ||
1242 | struct perf_cpu_context *cpuctx) | ||
1243 | { | ||
1244 | event->state = PERF_EVENT_STATE_INACTIVE; | ||
1245 | event->oncpu = -1; | ||
1246 | |||
1247 | if (!is_x86_event(event)) | ||
1248 | event->pmu->disable(event); | ||
1249 | |||
1250 | event->tstamp_running -= event->ctx->time - event->tstamp_stopped; | ||
1251 | |||
1252 | if (!is_software_event(event)) | ||
1253 | cpuctx->active_oncpu--; | ||
1254 | |||
1255 | if (event->attr.exclusive || !cpuctx->active_oncpu) | ||
1256 | cpuctx->exclusive = 0; | ||
1257 | } | ||
1258 | |||
1259 | /* | ||
1260 | * Called to enable a whole group of events. | ||
1261 | * Returns 1 if the group was enabled, or -EAGAIN if it could not be. | ||
1262 | * Assumes the caller has disabled interrupts and has | ||
1263 | * frozen the PMU with hw_perf_save_disable. | ||
1264 | * | ||
1265 | * called with PMU disabled. If successful and return value 1, | ||
1266 | * then guaranteed to call perf_enable() and hw_perf_enable() | ||
1267 | */ | ||
1268 | int hw_perf_group_sched_in(struct perf_event *leader, | ||
1269 | struct perf_cpu_context *cpuctx, | ||
1270 | struct perf_event_context *ctx) | ||
1271 | { | ||
1272 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1273 | struct perf_event *sub; | ||
1274 | int assign[X86_PMC_IDX_MAX]; | ||
1275 | int n0, n1, ret; | ||
1276 | |||
1277 | /* n0 = total number of events */ | ||
1278 | n0 = collect_events(cpuc, leader, true); | ||
1279 | if (n0 < 0) | ||
1280 | return n0; | ||
1281 | |||
1282 | ret = x86_schedule_events(cpuc, n0, assign); | ||
1283 | if (ret) | ||
1284 | return ret; | ||
1285 | |||
1286 | ret = x86_event_sched_in(leader, cpuctx); | ||
1287 | if (ret) | ||
1288 | return ret; | ||
1289 | |||
1290 | n1 = 1; | ||
1291 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
1292 | if (sub->state > PERF_EVENT_STATE_OFF) { | ||
1293 | ret = x86_event_sched_in(sub, cpuctx); | ||
1294 | if (ret) | ||
1295 | goto undo; | ||
1296 | ++n1; | ||
1297 | } | ||
1298 | } | ||
1299 | /* | ||
1300 | * copy new assignment, now we know it is possible | ||
1301 | * will be used by hw_perf_enable() | ||
1302 | */ | ||
1303 | memcpy(cpuc->assign, assign, n0*sizeof(int)); | ||
1304 | |||
1305 | cpuc->n_events = n0; | ||
1306 | cpuc->n_added += n1; | ||
1307 | ctx->nr_active += n1; | ||
1308 | |||
1309 | /* | ||
1310 | * 1 means successful and events are active | ||
1311 | * This is not quite true because we defer | ||
1312 | * actual activation until hw_perf_enable() but | ||
1313 | * this way we* ensure caller won't try to enable | ||
1314 | * individual events | ||
1315 | */ | ||
1316 | return 1; | ||
1317 | undo: | ||
1318 | x86_event_sched_out(leader, cpuctx); | ||
1319 | n0 = 1; | ||
1320 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
1321 | if (sub->state == PERF_EVENT_STATE_ACTIVE) { | ||
1322 | x86_event_sched_out(sub, cpuctx); | ||
1323 | if (++n0 == n1) | ||
1324 | break; | ||
1325 | } | ||
1326 | } | ||
1327 | return ret; | ||
1328 | } | ||
1329 | |||
1330 | #include "perf_event_amd.c" | 1304 | #include "perf_event_amd.c" |
1331 | #include "perf_event_p6.c" | 1305 | #include "perf_event_p6.c" |
1306 | #include "perf_event_p4.c" | ||
1307 | #include "perf_event_intel_lbr.c" | ||
1308 | #include "perf_event_intel_ds.c" | ||
1332 | #include "perf_event_intel.c" | 1309 | #include "perf_event_intel.c" |
1333 | 1310 | ||
1334 | static int __cpuinit | 1311 | static int __cpuinit |
@@ -1402,48 +1379,50 @@ void __init init_hw_perf_events(void) | |||
1402 | 1379 | ||
1403 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1380 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1404 | 1381 | ||
1405 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { | 1382 | if (x86_pmu.quirks) |
1383 | x86_pmu.quirks(); | ||
1384 | |||
1385 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | ||
1406 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1386 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
1407 | x86_pmu.num_events, X86_PMC_MAX_GENERIC); | 1387 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); |
1408 | x86_pmu.num_events = X86_PMC_MAX_GENERIC; | 1388 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; |
1409 | } | 1389 | } |
1410 | perf_event_mask = (1 << x86_pmu.num_events) - 1; | 1390 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; |
1411 | perf_max_events = x86_pmu.num_events; | 1391 | perf_max_events = x86_pmu.num_counters; |
1412 | 1392 | ||
1413 | if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { | 1393 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { |
1414 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | 1394 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", |
1415 | x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); | 1395 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); |
1416 | x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; | 1396 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; |
1417 | } | 1397 | } |
1418 | 1398 | ||
1419 | perf_event_mask |= | 1399 | x86_pmu.intel_ctrl |= |
1420 | ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; | 1400 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; |
1421 | x86_pmu.intel_ctrl = perf_event_mask; | ||
1422 | 1401 | ||
1423 | perf_events_lapic_init(); | 1402 | perf_events_lapic_init(); |
1424 | register_die_notifier(&perf_event_nmi_notifier); | 1403 | register_die_notifier(&perf_event_nmi_notifier); |
1425 | 1404 | ||
1426 | unconstrained = (struct event_constraint) | 1405 | unconstrained = (struct event_constraint) |
1427 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, | 1406 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
1428 | 0, x86_pmu.num_events); | 1407 | 0, x86_pmu.num_counters); |
1429 | 1408 | ||
1430 | if (x86_pmu.event_constraints) { | 1409 | if (x86_pmu.event_constraints) { |
1431 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1410 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
1432 | if (c->cmask != INTEL_ARCH_FIXED_MASK) | 1411 | if (c->cmask != X86_RAW_EVENT_MASK) |
1433 | continue; | 1412 | continue; |
1434 | 1413 | ||
1435 | c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; | 1414 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; |
1436 | c->weight += x86_pmu.num_events; | 1415 | c->weight += x86_pmu.num_counters; |
1437 | } | 1416 | } |
1438 | } | 1417 | } |
1439 | 1418 | ||
1440 | pr_info("... version: %d\n", x86_pmu.version); | 1419 | pr_info("... version: %d\n", x86_pmu.version); |
1441 | pr_info("... bit width: %d\n", x86_pmu.event_bits); | 1420 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
1442 | pr_info("... generic registers: %d\n", x86_pmu.num_events); | 1421 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
1443 | pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); | 1422 | pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); |
1444 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | 1423 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); |
1445 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); | 1424 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); |
1446 | pr_info("... event mask: %016Lx\n", perf_event_mask); | 1425 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); |
1447 | 1426 | ||
1448 | perf_cpu_notifier(x86_pmu_notifier); | 1427 | perf_cpu_notifier(x86_pmu_notifier); |
1449 | } | 1428 | } |
@@ -1453,6 +1432,67 @@ static inline void x86_pmu_read(struct perf_event *event) | |||
1453 | x86_perf_event_update(event); | 1432 | x86_perf_event_update(event); |
1454 | } | 1433 | } |
1455 | 1434 | ||
1435 | /* | ||
1436 | * Start group events scheduling transaction | ||
1437 | * Set the flag to make pmu::enable() not perform the | ||
1438 | * schedulability test, it will be performed at commit time | ||
1439 | */ | ||
1440 | static void x86_pmu_start_txn(const struct pmu *pmu) | ||
1441 | { | ||
1442 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1443 | |||
1444 | cpuc->group_flag |= PERF_EVENT_TXN; | ||
1445 | cpuc->n_txn = 0; | ||
1446 | } | ||
1447 | |||
1448 | /* | ||
1449 | * Stop group events scheduling transaction | ||
1450 | * Clear the flag and pmu::enable() will perform the | ||
1451 | * schedulability test. | ||
1452 | */ | ||
1453 | static void x86_pmu_cancel_txn(const struct pmu *pmu) | ||
1454 | { | ||
1455 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1456 | |||
1457 | cpuc->group_flag &= ~PERF_EVENT_TXN; | ||
1458 | /* | ||
1459 | * Truncate the collected events. | ||
1460 | */ | ||
1461 | cpuc->n_added -= cpuc->n_txn; | ||
1462 | cpuc->n_events -= cpuc->n_txn; | ||
1463 | } | ||
1464 | |||
1465 | /* | ||
1466 | * Commit group events scheduling transaction | ||
1467 | * Perform the group schedulability test as a whole | ||
1468 | * Return 0 if success | ||
1469 | */ | ||
1470 | static int x86_pmu_commit_txn(const struct pmu *pmu) | ||
1471 | { | ||
1472 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1473 | int assign[X86_PMC_IDX_MAX]; | ||
1474 | int n, ret; | ||
1475 | |||
1476 | n = cpuc->n_events; | ||
1477 | |||
1478 | if (!x86_pmu_initialized()) | ||
1479 | return -EAGAIN; | ||
1480 | |||
1481 | ret = x86_pmu.schedule_events(cpuc, n, assign); | ||
1482 | if (ret) | ||
1483 | return ret; | ||
1484 | |||
1485 | /* | ||
1486 | * copy new assignment, now we know it is possible | ||
1487 | * will be used by hw_perf_enable() | ||
1488 | */ | ||
1489 | memcpy(cpuc->assign, assign, n*sizeof(int)); | ||
1490 | |||
1491 | cpuc->group_flag &= ~PERF_EVENT_TXN; | ||
1492 | |||
1493 | return 0; | ||
1494 | } | ||
1495 | |||
1456 | static const struct pmu pmu = { | 1496 | static const struct pmu pmu = { |
1457 | .enable = x86_pmu_enable, | 1497 | .enable = x86_pmu_enable, |
1458 | .disable = x86_pmu_disable, | 1498 | .disable = x86_pmu_disable, |
@@ -1460,9 +1500,38 @@ static const struct pmu pmu = { | |||
1460 | .stop = x86_pmu_stop, | 1500 | .stop = x86_pmu_stop, |
1461 | .read = x86_pmu_read, | 1501 | .read = x86_pmu_read, |
1462 | .unthrottle = x86_pmu_unthrottle, | 1502 | .unthrottle = x86_pmu_unthrottle, |
1503 | .start_txn = x86_pmu_start_txn, | ||
1504 | .cancel_txn = x86_pmu_cancel_txn, | ||
1505 | .commit_txn = x86_pmu_commit_txn, | ||
1463 | }; | 1506 | }; |
1464 | 1507 | ||
1465 | /* | 1508 | /* |
1509 | * validate that we can schedule this event | ||
1510 | */ | ||
1511 | static int validate_event(struct perf_event *event) | ||
1512 | { | ||
1513 | struct cpu_hw_events *fake_cpuc; | ||
1514 | struct event_constraint *c; | ||
1515 | int ret = 0; | ||
1516 | |||
1517 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
1518 | if (!fake_cpuc) | ||
1519 | return -ENOMEM; | ||
1520 | |||
1521 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | ||
1522 | |||
1523 | if (!c || !c->weight) | ||
1524 | ret = -ENOSPC; | ||
1525 | |||
1526 | if (x86_pmu.put_event_constraints) | ||
1527 | x86_pmu.put_event_constraints(fake_cpuc, event); | ||
1528 | |||
1529 | kfree(fake_cpuc); | ||
1530 | |||
1531 | return ret; | ||
1532 | } | ||
1533 | |||
1534 | /* | ||
1466 | * validate a single event group | 1535 | * validate a single event group |
1467 | * | 1536 | * |
1468 | * validation include: | 1537 | * validation include: |
@@ -1502,7 +1571,7 @@ static int validate_group(struct perf_event *event) | |||
1502 | 1571 | ||
1503 | fake_cpuc->n_events = n; | 1572 | fake_cpuc->n_events = n; |
1504 | 1573 | ||
1505 | ret = x86_schedule_events(fake_cpuc, n, NULL); | 1574 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); |
1506 | 1575 | ||
1507 | out_free: | 1576 | out_free: |
1508 | kfree(fake_cpuc); | 1577 | kfree(fake_cpuc); |
@@ -1527,6 +1596,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1527 | 1596 | ||
1528 | if (event->group_leader != event) | 1597 | if (event->group_leader != event) |
1529 | err = validate_group(event); | 1598 | err = validate_group(event); |
1599 | else | ||
1600 | err = validate_event(event); | ||
1530 | 1601 | ||
1531 | event->pmu = tmp; | 1602 | event->pmu = tmp; |
1532 | } | 1603 | } |
@@ -1574,8 +1645,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
1574 | { | 1645 | { |
1575 | struct perf_callchain_entry *entry = data; | 1646 | struct perf_callchain_entry *entry = data; |
1576 | 1647 | ||
1577 | if (reliable) | 1648 | callchain_store(entry, addr); |
1578 | callchain_store(entry, addr); | ||
1579 | } | 1649 | } |
1580 | 1650 | ||
1581 | static const struct stacktrace_ops backtrace_ops = { | 1651 | static const struct stacktrace_ops backtrace_ops = { |
@@ -1586,8 +1656,6 @@ static const struct stacktrace_ops backtrace_ops = { | |||
1586 | .walk_stack = print_context_stack_bp, | 1656 | .walk_stack = print_context_stack_bp, |
1587 | }; | 1657 | }; |
1588 | 1658 | ||
1589 | #include "../dumpstack.h" | ||
1590 | |||
1591 | static void | 1659 | static void |
1592 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1660 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) |
1593 | { | 1661 | { |
@@ -1597,41 +1665,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1597 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); | 1665 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); |
1598 | } | 1666 | } |
1599 | 1667 | ||
1600 | /* | ||
1601 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | ||
1602 | */ | ||
1603 | static unsigned long | ||
1604 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
1605 | { | ||
1606 | unsigned long offset, addr = (unsigned long)from; | ||
1607 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
1608 | unsigned long size, len = 0; | ||
1609 | struct page *page; | ||
1610 | void *map; | ||
1611 | int ret; | ||
1612 | |||
1613 | do { | ||
1614 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
1615 | if (!ret) | ||
1616 | break; | ||
1617 | |||
1618 | offset = addr & (PAGE_SIZE - 1); | ||
1619 | size = min(PAGE_SIZE - offset, n - len); | ||
1620 | |||
1621 | map = kmap_atomic(page, type); | ||
1622 | memcpy(to, map+offset, size); | ||
1623 | kunmap_atomic(map, type); | ||
1624 | put_page(page); | ||
1625 | |||
1626 | len += size; | ||
1627 | to += size; | ||
1628 | addr += size; | ||
1629 | |||
1630 | } while (len < n); | ||
1631 | |||
1632 | return len; | ||
1633 | } | ||
1634 | |||
1635 | #ifdef CONFIG_COMPAT | 1668 | #ifdef CONFIG_COMPAT |
1636 | static inline int | 1669 | static inline int |
1637 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1670 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) |
@@ -1727,6 +1760,11 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1727 | { | 1760 | { |
1728 | struct perf_callchain_entry *entry; | 1761 | struct perf_callchain_entry *entry; |
1729 | 1762 | ||
1763 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
1764 | /* TODO: We don't support guest os callchain now */ | ||
1765 | return NULL; | ||
1766 | } | ||
1767 | |||
1730 | if (in_nmi()) | 1768 | if (in_nmi()) |
1731 | entry = &__get_cpu_var(pmc_nmi_entry); | 1769 | entry = &__get_cpu_var(pmc_nmi_entry); |
1732 | else | 1770 | else |
@@ -1739,14 +1777,36 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1739 | return entry; | 1777 | return entry; |
1740 | } | 1778 | } |
1741 | 1779 | ||
1742 | void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) | 1780 | unsigned long perf_instruction_pointer(struct pt_regs *regs) |
1743 | { | 1781 | { |
1744 | regs->ip = ip; | 1782 | unsigned long ip; |
1745 | /* | 1783 | |
1746 | * perf_arch_fetch_caller_regs adds another call, we need to increment | 1784 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) |
1747 | * the skip level | 1785 | ip = perf_guest_cbs->get_guest_ip(); |
1748 | */ | 1786 | else |
1749 | regs->bp = rewind_frame_pointer(skip + 1); | 1787 | ip = instruction_pointer(regs); |
1750 | regs->cs = __KERNEL_CS; | 1788 | |
1751 | local_save_flags(regs->flags); | 1789 | return ip; |
1790 | } | ||
1791 | |||
1792 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
1793 | { | ||
1794 | int misc = 0; | ||
1795 | |||
1796 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
1797 | if (perf_guest_cbs->is_user_mode()) | ||
1798 | misc |= PERF_RECORD_MISC_GUEST_USER; | ||
1799 | else | ||
1800 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; | ||
1801 | } else { | ||
1802 | if (user_mode(regs)) | ||
1803 | misc |= PERF_RECORD_MISC_USER; | ||
1804 | else | ||
1805 | misc |= PERF_RECORD_MISC_KERNEL; | ||
1806 | } | ||
1807 | |||
1808 | if (regs->flags & PERF_EFLAGS_EXACT) | ||
1809 | misc |= PERF_RECORD_MISC_EXACT_IP; | ||
1810 | |||
1811 | return misc; | ||
1752 | } | 1812 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index db6f7d4056e1..c2897b7b4a3b 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -2,7 +2,7 @@ | |||
2 | 2 | ||
3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); | 3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); |
4 | 4 | ||
5 | static __initconst u64 amd_hw_cache_event_ids | 5 | static __initconst const u64 amd_hw_cache_event_ids |
6 | [PERF_COUNT_HW_CACHE_MAX] | 6 | [PERF_COUNT_HW_CACHE_MAX] |
7 | [PERF_COUNT_HW_CACHE_OP_MAX] | 7 | [PERF_COUNT_HW_CACHE_OP_MAX] |
8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -102,8 +102,8 @@ static const u64 amd_perfmon_event_map[] = | |||
102 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | 102 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
103 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | 103 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, |
104 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | 104 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, |
105 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | 105 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, |
106 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | 106 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, |
107 | }; | 107 | }; |
108 | 108 | ||
109 | static u64 amd_pmu_event_map(int hw_event) | 109 | static u64 amd_pmu_event_map(int hw_event) |
@@ -111,22 +111,19 @@ static u64 amd_pmu_event_map(int hw_event) | |||
111 | return amd_perfmon_event_map[hw_event]; | 111 | return amd_perfmon_event_map[hw_event]; |
112 | } | 112 | } |
113 | 113 | ||
114 | static u64 amd_pmu_raw_event(u64 hw_event) | 114 | static int amd_pmu_hw_config(struct perf_event *event) |
115 | { | 115 | { |
116 | #define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL | 116 | int ret = x86_pmu_hw_config(event); |
117 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | 117 | |
118 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | 118 | if (ret) |
119 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | 119 | return ret; |
120 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | 120 | |
121 | 121 | if (event->attr.type != PERF_TYPE_RAW) | |
122 | #define K7_EVNTSEL_MASK \ | 122 | return 0; |
123 | (K7_EVNTSEL_EVENT_MASK | \ | 123 | |
124 | K7_EVNTSEL_UNIT_MASK | \ | 124 | event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; |
125 | K7_EVNTSEL_EDGE_MASK | \ | 125 | |
126 | K7_EVNTSEL_INV_MASK | \ | 126 | return 0; |
127 | K7_EVNTSEL_REG_MASK) | ||
128 | |||
129 | return hw_event & K7_EVNTSEL_MASK; | ||
130 | } | 127 | } |
131 | 128 | ||
132 | /* | 129 | /* |
@@ -165,7 +162,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | |||
165 | * be removed on one CPU at a time AND PMU is disabled | 162 | * be removed on one CPU at a time AND PMU is disabled |
166 | * when we come here | 163 | * when we come here |
167 | */ | 164 | */ |
168 | for (i = 0; i < x86_pmu.num_events; i++) { | 165 | for (i = 0; i < x86_pmu.num_counters; i++) { |
169 | if (nb->owners[i] == event) { | 166 | if (nb->owners[i] == event) { |
170 | cmpxchg(nb->owners+i, event, NULL); | 167 | cmpxchg(nb->owners+i, event, NULL); |
171 | break; | 168 | break; |
@@ -215,7 +212,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
215 | struct hw_perf_event *hwc = &event->hw; | 212 | struct hw_perf_event *hwc = &event->hw; |
216 | struct amd_nb *nb = cpuc->amd_nb; | 213 | struct amd_nb *nb = cpuc->amd_nb; |
217 | struct perf_event *old = NULL; | 214 | struct perf_event *old = NULL; |
218 | int max = x86_pmu.num_events; | 215 | int max = x86_pmu.num_counters; |
219 | int i, j, k = -1; | 216 | int i, j, k = -1; |
220 | 217 | ||
221 | /* | 218 | /* |
@@ -293,7 +290,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | |||
293 | /* | 290 | /* |
294 | * initialize all possible NB constraints | 291 | * initialize all possible NB constraints |
295 | */ | 292 | */ |
296 | for (i = 0; i < x86_pmu.num_events; i++) { | 293 | for (i = 0; i < x86_pmu.num_counters; i++) { |
297 | __set_bit(i, nb->event_constraints[i].idxmsk); | 294 | __set_bit(i, nb->event_constraints[i].idxmsk); |
298 | nb->event_constraints[i].weight = 1; | 295 | nb->event_constraints[i].weight = 1; |
299 | } | 296 | } |
@@ -371,21 +368,22 @@ static void amd_pmu_cpu_dead(int cpu) | |||
371 | raw_spin_unlock(&amd_nb_lock); | 368 | raw_spin_unlock(&amd_nb_lock); |
372 | } | 369 | } |
373 | 370 | ||
374 | static __initconst struct x86_pmu amd_pmu = { | 371 | static __initconst const struct x86_pmu amd_pmu = { |
375 | .name = "AMD", | 372 | .name = "AMD", |
376 | .handle_irq = x86_pmu_handle_irq, | 373 | .handle_irq = x86_pmu_handle_irq, |
377 | .disable_all = x86_pmu_disable_all, | 374 | .disable_all = x86_pmu_disable_all, |
378 | .enable_all = x86_pmu_enable_all, | 375 | .enable_all = x86_pmu_enable_all, |
379 | .enable = x86_pmu_enable_event, | 376 | .enable = x86_pmu_enable_event, |
380 | .disable = x86_pmu_disable_event, | 377 | .disable = x86_pmu_disable_event, |
378 | .hw_config = amd_pmu_hw_config, | ||
379 | .schedule_events = x86_schedule_events, | ||
381 | .eventsel = MSR_K7_EVNTSEL0, | 380 | .eventsel = MSR_K7_EVNTSEL0, |
382 | .perfctr = MSR_K7_PERFCTR0, | 381 | .perfctr = MSR_K7_PERFCTR0, |
383 | .event_map = amd_pmu_event_map, | 382 | .event_map = amd_pmu_event_map, |
384 | .raw_event = amd_pmu_raw_event, | ||
385 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | 383 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), |
386 | .num_events = 4, | 384 | .num_counters = 4, |
387 | .event_bits = 48, | 385 | .cntval_bits = 48, |
388 | .event_mask = (1ULL << 48) - 1, | 386 | .cntval_mask = (1ULL << 48) - 1, |
389 | .apic = 1, | 387 | .apic = 1, |
390 | /* use highest bit to detect overflow */ | 388 | /* use highest bit to detect overflow */ |
391 | .max_period = (1ULL << 47) - 1, | 389 | .max_period = (1ULL << 47) - 1, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9c794ac87837..ee05c90012d2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -72,6 +72,7 @@ static struct event_constraint intel_westmere_event_constraints[] = | |||
72 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | 72 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ |
73 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ | 73 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ |
74 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | 74 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ |
75 | INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */ | ||
75 | EVENT_CONSTRAINT_END | 76 | EVENT_CONSTRAINT_END |
76 | }; | 77 | }; |
77 | 78 | ||
@@ -88,7 +89,7 @@ static u64 intel_pmu_event_map(int hw_event) | |||
88 | return intel_perfmon_event_map[hw_event]; | 89 | return intel_perfmon_event_map[hw_event]; |
89 | } | 90 | } |
90 | 91 | ||
91 | static __initconst u64 westmere_hw_cache_event_ids | 92 | static __initconst const u64 westmere_hw_cache_event_ids |
92 | [PERF_COUNT_HW_CACHE_MAX] | 93 | [PERF_COUNT_HW_CACHE_MAX] |
93 | [PERF_COUNT_HW_CACHE_OP_MAX] | 94 | [PERF_COUNT_HW_CACHE_OP_MAX] |
94 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 95 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -179,7 +180,7 @@ static __initconst u64 westmere_hw_cache_event_ids | |||
179 | }, | 180 | }, |
180 | }; | 181 | }; |
181 | 182 | ||
182 | static __initconst u64 nehalem_hw_cache_event_ids | 183 | static __initconst const u64 nehalem_hw_cache_event_ids |
183 | [PERF_COUNT_HW_CACHE_MAX] | 184 | [PERF_COUNT_HW_CACHE_MAX] |
184 | [PERF_COUNT_HW_CACHE_OP_MAX] | 185 | [PERF_COUNT_HW_CACHE_OP_MAX] |
185 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 186 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -270,7 +271,7 @@ static __initconst u64 nehalem_hw_cache_event_ids | |||
270 | }, | 271 | }, |
271 | }; | 272 | }; |
272 | 273 | ||
273 | static __initconst u64 core2_hw_cache_event_ids | 274 | static __initconst const u64 core2_hw_cache_event_ids |
274 | [PERF_COUNT_HW_CACHE_MAX] | 275 | [PERF_COUNT_HW_CACHE_MAX] |
275 | [PERF_COUNT_HW_CACHE_OP_MAX] | 276 | [PERF_COUNT_HW_CACHE_OP_MAX] |
276 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 277 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -361,7 +362,7 @@ static __initconst u64 core2_hw_cache_event_ids | |||
361 | }, | 362 | }, |
362 | }; | 363 | }; |
363 | 364 | ||
364 | static __initconst u64 atom_hw_cache_event_ids | 365 | static __initconst const u64 atom_hw_cache_event_ids |
365 | [PERF_COUNT_HW_CACHE_MAX] | 366 | [PERF_COUNT_HW_CACHE_MAX] |
366 | [PERF_COUNT_HW_CACHE_OP_MAX] | 367 | [PERF_COUNT_HW_CACHE_OP_MAX] |
367 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 368 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -452,60 +453,6 @@ static __initconst u64 atom_hw_cache_event_ids | |||
452 | }, | 453 | }, |
453 | }; | 454 | }; |
454 | 455 | ||
455 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
456 | { | ||
457 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
458 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
459 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
460 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
461 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
462 | |||
463 | #define CORE_EVNTSEL_MASK \ | ||
464 | (INTEL_ARCH_EVTSEL_MASK | \ | ||
465 | INTEL_ARCH_UNIT_MASK | \ | ||
466 | INTEL_ARCH_EDGE_MASK | \ | ||
467 | INTEL_ARCH_INV_MASK | \ | ||
468 | INTEL_ARCH_CNT_MASK) | ||
469 | |||
470 | return hw_event & CORE_EVNTSEL_MASK; | ||
471 | } | ||
472 | |||
473 | static void intel_pmu_enable_bts(u64 config) | ||
474 | { | ||
475 | unsigned long debugctlmsr; | ||
476 | |||
477 | debugctlmsr = get_debugctlmsr(); | ||
478 | |||
479 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
480 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
481 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
482 | |||
483 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
484 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
485 | |||
486 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
487 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
488 | |||
489 | update_debugctlmsr(debugctlmsr); | ||
490 | } | ||
491 | |||
492 | static void intel_pmu_disable_bts(void) | ||
493 | { | ||
494 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
495 | unsigned long debugctlmsr; | ||
496 | |||
497 | if (!cpuc->ds) | ||
498 | return; | ||
499 | |||
500 | debugctlmsr = get_debugctlmsr(); | ||
501 | |||
502 | debugctlmsr &= | ||
503 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
504 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
505 | |||
506 | update_debugctlmsr(debugctlmsr); | ||
507 | } | ||
508 | |||
509 | static void intel_pmu_disable_all(void) | 456 | static void intel_pmu_disable_all(void) |
510 | { | 457 | { |
511 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 458 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -514,12 +461,17 @@ static void intel_pmu_disable_all(void) | |||
514 | 461 | ||
515 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | 462 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) |
516 | intel_pmu_disable_bts(); | 463 | intel_pmu_disable_bts(); |
464 | |||
465 | intel_pmu_pebs_disable_all(); | ||
466 | intel_pmu_lbr_disable_all(); | ||
517 | } | 467 | } |
518 | 468 | ||
519 | static void intel_pmu_enable_all(void) | 469 | static void intel_pmu_enable_all(int added) |
520 | { | 470 | { |
521 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 471 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
522 | 472 | ||
473 | intel_pmu_pebs_enable_all(); | ||
474 | intel_pmu_lbr_enable_all(); | ||
523 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 475 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
524 | 476 | ||
525 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | 477 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { |
@@ -533,6 +485,87 @@ static void intel_pmu_enable_all(void) | |||
533 | } | 485 | } |
534 | } | 486 | } |
535 | 487 | ||
488 | /* | ||
489 | * Workaround for: | ||
490 | * Intel Errata AAK100 (model 26) | ||
491 | * Intel Errata AAP53 (model 30) | ||
492 | * Intel Errata BD53 (model 44) | ||
493 | * | ||
494 | * The official story: | ||
495 | * These chips need to be 'reset' when adding counters by programming the | ||
496 | * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either | ||
497 | * in sequence on the same PMC or on different PMCs. | ||
498 | * | ||
499 | * In practise it appears some of these events do in fact count, and | ||
500 | * we need to programm all 4 events. | ||
501 | */ | ||
502 | static void intel_pmu_nhm_workaround(void) | ||
503 | { | ||
504 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
505 | static const unsigned long nhm_magic[4] = { | ||
506 | 0x4300B5, | ||
507 | 0x4300D2, | ||
508 | 0x4300B1, | ||
509 | 0x4300B1 | ||
510 | }; | ||
511 | struct perf_event *event; | ||
512 | int i; | ||
513 | |||
514 | /* | ||
515 | * The Errata requires below steps: | ||
516 | * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; | ||
517 | * 2) Configure 4 PERFEVTSELx with the magic events and clear | ||
518 | * the corresponding PMCx; | ||
519 | * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; | ||
520 | * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; | ||
521 | * 5) Clear 4 pairs of ERFEVTSELx and PMCx; | ||
522 | */ | ||
523 | |||
524 | /* | ||
525 | * The real steps we choose are a little different from above. | ||
526 | * A) To reduce MSR operations, we don't run step 1) as they | ||
527 | * are already cleared before this function is called; | ||
528 | * B) Call x86_perf_event_update to save PMCx before configuring | ||
529 | * PERFEVTSELx with magic number; | ||
530 | * C) With step 5), we do clear only when the PERFEVTSELx is | ||
531 | * not used currently. | ||
532 | * D) Call x86_perf_event_set_period to restore PMCx; | ||
533 | */ | ||
534 | |||
535 | /* We always operate 4 pairs of PERF Counters */ | ||
536 | for (i = 0; i < 4; i++) { | ||
537 | event = cpuc->events[i]; | ||
538 | if (event) | ||
539 | x86_perf_event_update(event); | ||
540 | } | ||
541 | |||
542 | for (i = 0; i < 4; i++) { | ||
543 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); | ||
544 | wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); | ||
545 | } | ||
546 | |||
547 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); | ||
548 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); | ||
549 | |||
550 | for (i = 0; i < 4; i++) { | ||
551 | event = cpuc->events[i]; | ||
552 | |||
553 | if (event) { | ||
554 | x86_perf_event_set_period(event); | ||
555 | __x86_pmu_enable_event(&event->hw, | ||
556 | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
557 | } else | ||
558 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); | ||
559 | } | ||
560 | } | ||
561 | |||
562 | static void intel_pmu_nhm_enable_all(int added) | ||
563 | { | ||
564 | if (added) | ||
565 | intel_pmu_nhm_workaround(); | ||
566 | intel_pmu_enable_all(added); | ||
567 | } | ||
568 | |||
536 | static inline u64 intel_pmu_get_status(void) | 569 | static inline u64 intel_pmu_get_status(void) |
537 | { | 570 | { |
538 | u64 status; | 571 | u64 status; |
@@ -547,8 +580,7 @@ static inline void intel_pmu_ack_status(u64 ack) | |||
547 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | 580 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); |
548 | } | 581 | } |
549 | 582 | ||
550 | static inline void | 583 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) |
551 | intel_pmu_disable_fixed(struct hw_perf_event *hwc) | ||
552 | { | 584 | { |
553 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 585 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
554 | u64 ctrl_val, mask; | 586 | u64 ctrl_val, mask; |
@@ -557,71 +589,10 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc) | |||
557 | 589 | ||
558 | rdmsrl(hwc->config_base, ctrl_val); | 590 | rdmsrl(hwc->config_base, ctrl_val); |
559 | ctrl_val &= ~mask; | 591 | ctrl_val &= ~mask; |
560 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | 592 | wrmsrl(hwc->config_base, ctrl_val); |
561 | } | ||
562 | |||
563 | static void intel_pmu_drain_bts_buffer(void) | ||
564 | { | ||
565 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
566 | struct debug_store *ds = cpuc->ds; | ||
567 | struct bts_record { | ||
568 | u64 from; | ||
569 | u64 to; | ||
570 | u64 flags; | ||
571 | }; | ||
572 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
573 | struct bts_record *at, *top; | ||
574 | struct perf_output_handle handle; | ||
575 | struct perf_event_header header; | ||
576 | struct perf_sample_data data; | ||
577 | struct pt_regs regs; | ||
578 | |||
579 | if (!event) | ||
580 | return; | ||
581 | |||
582 | if (!ds) | ||
583 | return; | ||
584 | |||
585 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
586 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
587 | |||
588 | if (top <= at) | ||
589 | return; | ||
590 | |||
591 | ds->bts_index = ds->bts_buffer_base; | ||
592 | |||
593 | perf_sample_data_init(&data, 0); | ||
594 | |||
595 | data.period = event->hw.last_period; | ||
596 | regs.ip = 0; | ||
597 | |||
598 | /* | ||
599 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
600 | * We will overwrite the from and to address before we output | ||
601 | * the sample. | ||
602 | */ | ||
603 | perf_prepare_sample(&header, &data, event, ®s); | ||
604 | |||
605 | if (perf_output_begin(&handle, event, | ||
606 | header.size * (top - at), 1, 1)) | ||
607 | return; | ||
608 | |||
609 | for (; at < top; at++) { | ||
610 | data.ip = at->from; | ||
611 | data.addr = at->to; | ||
612 | |||
613 | perf_output_sample(&handle, &header, &data, event); | ||
614 | } | ||
615 | |||
616 | perf_output_end(&handle); | ||
617 | |||
618 | /* There's new data available. */ | ||
619 | event->hw.interrupts++; | ||
620 | event->pending_kill = POLL_IN; | ||
621 | } | 593 | } |
622 | 594 | ||
623 | static inline void | 595 | static void intel_pmu_disable_event(struct perf_event *event) |
624 | intel_pmu_disable_event(struct perf_event *event) | ||
625 | { | 596 | { |
626 | struct hw_perf_event *hwc = &event->hw; | 597 | struct hw_perf_event *hwc = &event->hw; |
627 | 598 | ||
@@ -637,14 +608,15 @@ intel_pmu_disable_event(struct perf_event *event) | |||
637 | } | 608 | } |
638 | 609 | ||
639 | x86_pmu_disable_event(event); | 610 | x86_pmu_disable_event(event); |
611 | |||
612 | if (unlikely(event->attr.precise_ip)) | ||
613 | intel_pmu_pebs_disable(event); | ||
640 | } | 614 | } |
641 | 615 | ||
642 | static inline void | 616 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) |
643 | intel_pmu_enable_fixed(struct hw_perf_event *hwc) | ||
644 | { | 617 | { |
645 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 618 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
646 | u64 ctrl_val, bits, mask; | 619 | u64 ctrl_val, bits, mask; |
647 | int err; | ||
648 | 620 | ||
649 | /* | 621 | /* |
650 | * Enable IRQ generation (0x8), | 622 | * Enable IRQ generation (0x8), |
@@ -669,7 +641,7 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc) | |||
669 | rdmsrl(hwc->config_base, ctrl_val); | 641 | rdmsrl(hwc->config_base, ctrl_val); |
670 | ctrl_val &= ~mask; | 642 | ctrl_val &= ~mask; |
671 | ctrl_val |= bits; | 643 | ctrl_val |= bits; |
672 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | 644 | wrmsrl(hwc->config_base, ctrl_val); |
673 | } | 645 | } |
674 | 646 | ||
675 | static void intel_pmu_enable_event(struct perf_event *event) | 647 | static void intel_pmu_enable_event(struct perf_event *event) |
@@ -689,7 +661,10 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
689 | return; | 661 | return; |
690 | } | 662 | } |
691 | 663 | ||
692 | __x86_pmu_enable_event(hwc); | 664 | if (unlikely(event->attr.precise_ip)) |
665 | intel_pmu_pebs_enable(event); | ||
666 | |||
667 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); | ||
693 | } | 668 | } |
694 | 669 | ||
695 | /* | 670 | /* |
@@ -708,20 +683,20 @@ static void intel_pmu_reset(void) | |||
708 | unsigned long flags; | 683 | unsigned long flags; |
709 | int idx; | 684 | int idx; |
710 | 685 | ||
711 | if (!x86_pmu.num_events) | 686 | if (!x86_pmu.num_counters) |
712 | return; | 687 | return; |
713 | 688 | ||
714 | local_irq_save(flags); | 689 | local_irq_save(flags); |
715 | 690 | ||
716 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | 691 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); |
717 | 692 | ||
718 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 693 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
719 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | 694 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); |
720 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | 695 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); |
721 | } | 696 | } |
722 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | 697 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) |
723 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 698 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
724 | } | 699 | |
725 | if (ds) | 700 | if (ds) |
726 | ds->bts_index = ds->bts_buffer_base; | 701 | ds->bts_index = ds->bts_buffer_base; |
727 | 702 | ||
@@ -737,7 +712,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
737 | struct perf_sample_data data; | 712 | struct perf_sample_data data; |
738 | struct cpu_hw_events *cpuc; | 713 | struct cpu_hw_events *cpuc; |
739 | int bit, loops; | 714 | int bit, loops; |
740 | u64 ack, status; | 715 | u64 status; |
716 | int handled = 0; | ||
741 | 717 | ||
742 | perf_sample_data_init(&data, 0); | 718 | perf_sample_data_init(&data, 0); |
743 | 719 | ||
@@ -747,12 +723,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
747 | intel_pmu_drain_bts_buffer(); | 723 | intel_pmu_drain_bts_buffer(); |
748 | status = intel_pmu_get_status(); | 724 | status = intel_pmu_get_status(); |
749 | if (!status) { | 725 | if (!status) { |
750 | intel_pmu_enable_all(); | 726 | intel_pmu_enable_all(0); |
751 | return 0; | 727 | return 0; |
752 | } | 728 | } |
753 | 729 | ||
754 | loops = 0; | 730 | loops = 0; |
755 | again: | 731 | again: |
732 | intel_pmu_ack_status(status); | ||
756 | if (++loops > 100) { | 733 | if (++loops > 100) { |
757 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | 734 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); |
758 | perf_event_print_debug(); | 735 | perf_event_print_debug(); |
@@ -761,10 +738,22 @@ again: | |||
761 | } | 738 | } |
762 | 739 | ||
763 | inc_irq_stat(apic_perf_irqs); | 740 | inc_irq_stat(apic_perf_irqs); |
764 | ack = status; | 741 | |
742 | intel_pmu_lbr_read(); | ||
743 | |||
744 | /* | ||
745 | * PEBS overflow sets bit 62 in the global status register | ||
746 | */ | ||
747 | if (__test_and_clear_bit(62, (unsigned long *)&status)) { | ||
748 | handled++; | ||
749 | x86_pmu.drain_pebs(regs); | ||
750 | } | ||
751 | |||
765 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | 752 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { |
766 | struct perf_event *event = cpuc->events[bit]; | 753 | struct perf_event *event = cpuc->events[bit]; |
767 | 754 | ||
755 | handled++; | ||
756 | |||
768 | if (!test_bit(bit, cpuc->active_mask)) | 757 | if (!test_bit(bit, cpuc->active_mask)) |
769 | continue; | 758 | continue; |
770 | 759 | ||
@@ -777,8 +766,6 @@ again: | |||
777 | x86_pmu_stop(event); | 766 | x86_pmu_stop(event); |
778 | } | 767 | } |
779 | 768 | ||
780 | intel_pmu_ack_status(ack); | ||
781 | |||
782 | /* | 769 | /* |
783 | * Repeat if there is more work to be done: | 770 | * Repeat if there is more work to be done: |
784 | */ | 771 | */ |
@@ -787,26 +774,22 @@ again: | |||
787 | goto again; | 774 | goto again; |
788 | 775 | ||
789 | done: | 776 | done: |
790 | intel_pmu_enable_all(); | 777 | intel_pmu_enable_all(0); |
791 | return 1; | 778 | return handled; |
792 | } | 779 | } |
793 | 780 | ||
794 | static struct event_constraint bts_constraint = | ||
795 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
796 | |||
797 | static struct event_constraint * | 781 | static struct event_constraint * |
798 | intel_special_constraints(struct perf_event *event) | 782 | intel_bts_constraints(struct perf_event *event) |
799 | { | 783 | { |
800 | unsigned int hw_event; | 784 | struct hw_perf_event *hwc = &event->hw; |
801 | 785 | unsigned int hw_event, bts_event; | |
802 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
803 | 786 | ||
804 | if (unlikely((hw_event == | 787 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
805 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | 788 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
806 | (event->hw.sample_period == 1))) { | ||
807 | 789 | ||
790 | if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) | ||
808 | return &bts_constraint; | 791 | return &bts_constraint; |
809 | } | 792 | |
810 | return NULL; | 793 | return NULL; |
811 | } | 794 | } |
812 | 795 | ||
@@ -815,24 +798,53 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
815 | { | 798 | { |
816 | struct event_constraint *c; | 799 | struct event_constraint *c; |
817 | 800 | ||
818 | c = intel_special_constraints(event); | 801 | c = intel_bts_constraints(event); |
802 | if (c) | ||
803 | return c; | ||
804 | |||
805 | c = intel_pebs_constraints(event); | ||
819 | if (c) | 806 | if (c) |
820 | return c; | 807 | return c; |
821 | 808 | ||
822 | return x86_get_event_constraints(cpuc, event); | 809 | return x86_get_event_constraints(cpuc, event); |
823 | } | 810 | } |
824 | 811 | ||
825 | static __initconst struct x86_pmu core_pmu = { | 812 | static int intel_pmu_hw_config(struct perf_event *event) |
813 | { | ||
814 | int ret = x86_pmu_hw_config(event); | ||
815 | |||
816 | if (ret) | ||
817 | return ret; | ||
818 | |||
819 | if (event->attr.type != PERF_TYPE_RAW) | ||
820 | return 0; | ||
821 | |||
822 | if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) | ||
823 | return 0; | ||
824 | |||
825 | if (x86_pmu.version < 3) | ||
826 | return -EINVAL; | ||
827 | |||
828 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
829 | return -EACCES; | ||
830 | |||
831 | event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; | ||
832 | |||
833 | return 0; | ||
834 | } | ||
835 | |||
836 | static __initconst const struct x86_pmu core_pmu = { | ||
826 | .name = "core", | 837 | .name = "core", |
827 | .handle_irq = x86_pmu_handle_irq, | 838 | .handle_irq = x86_pmu_handle_irq, |
828 | .disable_all = x86_pmu_disable_all, | 839 | .disable_all = x86_pmu_disable_all, |
829 | .enable_all = x86_pmu_enable_all, | 840 | .enable_all = x86_pmu_enable_all, |
830 | .enable = x86_pmu_enable_event, | 841 | .enable = x86_pmu_enable_event, |
831 | .disable = x86_pmu_disable_event, | 842 | .disable = x86_pmu_disable_event, |
843 | .hw_config = x86_pmu_hw_config, | ||
844 | .schedule_events = x86_schedule_events, | ||
832 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 845 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
833 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 846 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
834 | .event_map = intel_pmu_event_map, | 847 | .event_map = intel_pmu_event_map, |
835 | .raw_event = intel_pmu_raw_event, | ||
836 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 848 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
837 | .apic = 1, | 849 | .apic = 1, |
838 | /* | 850 | /* |
@@ -845,17 +857,32 @@ static __initconst struct x86_pmu core_pmu = { | |||
845 | .event_constraints = intel_core_event_constraints, | 857 | .event_constraints = intel_core_event_constraints, |
846 | }; | 858 | }; |
847 | 859 | ||
848 | static __initconst struct x86_pmu intel_pmu = { | 860 | static void intel_pmu_cpu_starting(int cpu) |
861 | { | ||
862 | init_debug_store_on_cpu(cpu); | ||
863 | /* | ||
864 | * Deal with CPUs that don't clear their LBRs on power-up. | ||
865 | */ | ||
866 | intel_pmu_lbr_reset(); | ||
867 | } | ||
868 | |||
869 | static void intel_pmu_cpu_dying(int cpu) | ||
870 | { | ||
871 | fini_debug_store_on_cpu(cpu); | ||
872 | } | ||
873 | |||
874 | static __initconst const struct x86_pmu intel_pmu = { | ||
849 | .name = "Intel", | 875 | .name = "Intel", |
850 | .handle_irq = intel_pmu_handle_irq, | 876 | .handle_irq = intel_pmu_handle_irq, |
851 | .disable_all = intel_pmu_disable_all, | 877 | .disable_all = intel_pmu_disable_all, |
852 | .enable_all = intel_pmu_enable_all, | 878 | .enable_all = intel_pmu_enable_all, |
853 | .enable = intel_pmu_enable_event, | 879 | .enable = intel_pmu_enable_event, |
854 | .disable = intel_pmu_disable_event, | 880 | .disable = intel_pmu_disable_event, |
881 | .hw_config = intel_pmu_hw_config, | ||
882 | .schedule_events = x86_schedule_events, | ||
855 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 883 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
856 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 884 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
857 | .event_map = intel_pmu_event_map, | 885 | .event_map = intel_pmu_event_map, |
858 | .raw_event = intel_pmu_raw_event, | ||
859 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 886 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
860 | .apic = 1, | 887 | .apic = 1, |
861 | /* | 888 | /* |
@@ -864,14 +891,38 @@ static __initconst struct x86_pmu intel_pmu = { | |||
864 | * the generic event period: | 891 | * the generic event period: |
865 | */ | 892 | */ |
866 | .max_period = (1ULL << 31) - 1, | 893 | .max_period = (1ULL << 31) - 1, |
867 | .enable_bts = intel_pmu_enable_bts, | ||
868 | .disable_bts = intel_pmu_disable_bts, | ||
869 | .get_event_constraints = intel_get_event_constraints, | 894 | .get_event_constraints = intel_get_event_constraints, |
870 | 895 | ||
871 | .cpu_starting = init_debug_store_on_cpu, | 896 | .cpu_starting = intel_pmu_cpu_starting, |
872 | .cpu_dying = fini_debug_store_on_cpu, | 897 | .cpu_dying = intel_pmu_cpu_dying, |
873 | }; | 898 | }; |
874 | 899 | ||
900 | static void intel_clovertown_quirks(void) | ||
901 | { | ||
902 | /* | ||
903 | * PEBS is unreliable due to: | ||
904 | * | ||
905 | * AJ67 - PEBS may experience CPL leaks | ||
906 | * AJ68 - PEBS PMI may be delayed by one event | ||
907 | * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] | ||
908 | * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS | ||
909 | * | ||
910 | * AJ67 could be worked around by restricting the OS/USR flags. | ||
911 | * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. | ||
912 | * | ||
913 | * AJ106 could possibly be worked around by not allowing LBR | ||
914 | * usage from PEBS, including the fixup. | ||
915 | * AJ68 could possibly be worked around by always programming | ||
916 | * a pebs_event_reset[0] value and coping with the lost events. | ||
917 | * | ||
918 | * But taken together it might just make sense to not enable PEBS on | ||
919 | * these chips. | ||
920 | */ | ||
921 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | ||
922 | x86_pmu.pebs = 0; | ||
923 | x86_pmu.pebs_constraints = NULL; | ||
924 | } | ||
925 | |||
875 | static __init int intel_pmu_init(void) | 926 | static __init int intel_pmu_init(void) |
876 | { | 927 | { |
877 | union cpuid10_edx edx; | 928 | union cpuid10_edx edx; |
@@ -881,12 +932,13 @@ static __init int intel_pmu_init(void) | |||
881 | int version; | 932 | int version; |
882 | 933 | ||
883 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 934 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
884 | /* check for P6 processor family */ | 935 | switch (boot_cpu_data.x86) { |
885 | if (boot_cpu_data.x86 == 6) { | 936 | case 0x6: |
886 | return p6_pmu_init(); | 937 | return p6_pmu_init(); |
887 | } else { | 938 | case 0xf: |
939 | return p4_pmu_init(); | ||
940 | } | ||
888 | return -ENODEV; | 941 | return -ENODEV; |
889 | } | ||
890 | } | 942 | } |
891 | 943 | ||
892 | /* | 944 | /* |
@@ -904,16 +956,28 @@ static __init int intel_pmu_init(void) | |||
904 | x86_pmu = intel_pmu; | 956 | x86_pmu = intel_pmu; |
905 | 957 | ||
906 | x86_pmu.version = version; | 958 | x86_pmu.version = version; |
907 | x86_pmu.num_events = eax.split.num_events; | 959 | x86_pmu.num_counters = eax.split.num_counters; |
908 | x86_pmu.event_bits = eax.split.bit_width; | 960 | x86_pmu.cntval_bits = eax.split.bit_width; |
909 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | 961 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; |
910 | 962 | ||
911 | /* | 963 | /* |
912 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 964 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
913 | * assume at least 3 events: | 965 | * assume at least 3 events: |
914 | */ | 966 | */ |
915 | if (version > 1) | 967 | if (version > 1) |
916 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | 968 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); |
969 | |||
970 | /* | ||
971 | * v2 and above have a perf capabilities MSR | ||
972 | */ | ||
973 | if (version > 1) { | ||
974 | u64 capabilities; | ||
975 | |||
976 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | ||
977 | x86_pmu.intel_cap.capabilities = capabilities; | ||
978 | } | ||
979 | |||
980 | intel_ds_init(); | ||
917 | 981 | ||
918 | /* | 982 | /* |
919 | * Install the hw-cache-events table: | 983 | * Install the hw-cache-events table: |
@@ -924,12 +988,15 @@ static __init int intel_pmu_init(void) | |||
924 | break; | 988 | break; |
925 | 989 | ||
926 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 990 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ |
991 | x86_pmu.quirks = intel_clovertown_quirks; | ||
927 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 992 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ |
928 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 993 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ |
929 | case 29: /* six-core 45 nm xeon "Dunnington" */ | 994 | case 29: /* six-core 45 nm xeon "Dunnington" */ |
930 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | 995 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, |
931 | sizeof(hw_cache_event_ids)); | 996 | sizeof(hw_cache_event_ids)); |
932 | 997 | ||
998 | intel_pmu_lbr_init_core(); | ||
999 | |||
933 | x86_pmu.event_constraints = intel_core2_event_constraints; | 1000 | x86_pmu.event_constraints = intel_core2_event_constraints; |
934 | pr_cont("Core2 events, "); | 1001 | pr_cont("Core2 events, "); |
935 | break; | 1002 | break; |
@@ -940,13 +1007,19 @@ static __init int intel_pmu_init(void) | |||
940 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 1007 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
941 | sizeof(hw_cache_event_ids)); | 1008 | sizeof(hw_cache_event_ids)); |
942 | 1009 | ||
1010 | intel_pmu_lbr_init_nhm(); | ||
1011 | |||
943 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 1012 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
944 | pr_cont("Nehalem/Corei7 events, "); | 1013 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1014 | pr_cont("Nehalem events, "); | ||
945 | break; | 1015 | break; |
1016 | |||
946 | case 28: /* Atom */ | 1017 | case 28: /* Atom */ |
947 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | 1018 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
948 | sizeof(hw_cache_event_ids)); | 1019 | sizeof(hw_cache_event_ids)); |
949 | 1020 | ||
1021 | intel_pmu_lbr_init_atom(); | ||
1022 | |||
950 | x86_pmu.event_constraints = intel_gen_event_constraints; | 1023 | x86_pmu.event_constraints = intel_gen_event_constraints; |
951 | pr_cont("Atom events, "); | 1024 | pr_cont("Atom events, "); |
952 | break; | 1025 | break; |
@@ -956,7 +1029,10 @@ static __init int intel_pmu_init(void) | |||
956 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 1029 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
957 | sizeof(hw_cache_event_ids)); | 1030 | sizeof(hw_cache_event_ids)); |
958 | 1031 | ||
1032 | intel_pmu_lbr_init_nhm(); | ||
1033 | |||
959 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 1034 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
1035 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | ||
960 | pr_cont("Westmere events, "); | 1036 | pr_cont("Westmere events, "); |
961 | break; | 1037 | break; |
962 | 1038 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c new file mode 100644 index 000000000000..18018d1311cd --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -0,0 +1,641 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* The maximal number of PEBS events: */ | ||
4 | #define MAX_PEBS_EVENTS 4 | ||
5 | |||
6 | /* The size of a BTS record in bytes: */ | ||
7 | #define BTS_RECORD_SIZE 24 | ||
8 | |||
9 | #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) | ||
10 | #define PEBS_BUFFER_SIZE PAGE_SIZE | ||
11 | |||
12 | /* | ||
13 | * pebs_record_32 for p4 and core not supported | ||
14 | |||
15 | struct pebs_record_32 { | ||
16 | u32 flags, ip; | ||
17 | u32 ax, bc, cx, dx; | ||
18 | u32 si, di, bp, sp; | ||
19 | }; | ||
20 | |||
21 | */ | ||
22 | |||
23 | struct pebs_record_core { | ||
24 | u64 flags, ip; | ||
25 | u64 ax, bx, cx, dx; | ||
26 | u64 si, di, bp, sp; | ||
27 | u64 r8, r9, r10, r11; | ||
28 | u64 r12, r13, r14, r15; | ||
29 | }; | ||
30 | |||
31 | struct pebs_record_nhm { | ||
32 | u64 flags, ip; | ||
33 | u64 ax, bx, cx, dx; | ||
34 | u64 si, di, bp, sp; | ||
35 | u64 r8, r9, r10, r11; | ||
36 | u64 r12, r13, r14, r15; | ||
37 | u64 status, dla, dse, lat; | ||
38 | }; | ||
39 | |||
40 | /* | ||
41 | * A debug store configuration. | ||
42 | * | ||
43 | * We only support architectures that use 64bit fields. | ||
44 | */ | ||
45 | struct debug_store { | ||
46 | u64 bts_buffer_base; | ||
47 | u64 bts_index; | ||
48 | u64 bts_absolute_maximum; | ||
49 | u64 bts_interrupt_threshold; | ||
50 | u64 pebs_buffer_base; | ||
51 | u64 pebs_index; | ||
52 | u64 pebs_absolute_maximum; | ||
53 | u64 pebs_interrupt_threshold; | ||
54 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
55 | }; | ||
56 | |||
57 | static void init_debug_store_on_cpu(int cpu) | ||
58 | { | ||
59 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
60 | |||
61 | if (!ds) | ||
62 | return; | ||
63 | |||
64 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
65 | (u32)((u64)(unsigned long)ds), | ||
66 | (u32)((u64)(unsigned long)ds >> 32)); | ||
67 | } | ||
68 | |||
69 | static void fini_debug_store_on_cpu(int cpu) | ||
70 | { | ||
71 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
72 | return; | ||
73 | |||
74 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
75 | } | ||
76 | |||
77 | static void release_ds_buffers(void) | ||
78 | { | ||
79 | int cpu; | ||
80 | |||
81 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
82 | return; | ||
83 | |||
84 | get_online_cpus(); | ||
85 | |||
86 | for_each_online_cpu(cpu) | ||
87 | fini_debug_store_on_cpu(cpu); | ||
88 | |||
89 | for_each_possible_cpu(cpu) { | ||
90 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
91 | |||
92 | if (!ds) | ||
93 | continue; | ||
94 | |||
95 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
96 | |||
97 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | ||
98 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
99 | kfree(ds); | ||
100 | } | ||
101 | |||
102 | put_online_cpus(); | ||
103 | } | ||
104 | |||
105 | static int reserve_ds_buffers(void) | ||
106 | { | ||
107 | int cpu, err = 0; | ||
108 | |||
109 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
110 | return 0; | ||
111 | |||
112 | get_online_cpus(); | ||
113 | |||
114 | for_each_possible_cpu(cpu) { | ||
115 | struct debug_store *ds; | ||
116 | void *buffer; | ||
117 | int max, thresh; | ||
118 | |||
119 | err = -ENOMEM; | ||
120 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
121 | if (unlikely(!ds)) | ||
122 | break; | ||
123 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
124 | |||
125 | if (x86_pmu.bts) { | ||
126 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
127 | if (unlikely(!buffer)) | ||
128 | break; | ||
129 | |||
130 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; | ||
131 | thresh = max / 16; | ||
132 | |||
133 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
134 | ds->bts_index = ds->bts_buffer_base; | ||
135 | ds->bts_absolute_maximum = ds->bts_buffer_base + | ||
136 | max * BTS_RECORD_SIZE; | ||
137 | ds->bts_interrupt_threshold = ds->bts_absolute_maximum - | ||
138 | thresh * BTS_RECORD_SIZE; | ||
139 | } | ||
140 | |||
141 | if (x86_pmu.pebs) { | ||
142 | buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); | ||
143 | if (unlikely(!buffer)) | ||
144 | break; | ||
145 | |||
146 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | ||
147 | |||
148 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | ||
149 | ds->pebs_index = ds->pebs_buffer_base; | ||
150 | ds->pebs_absolute_maximum = ds->pebs_buffer_base + | ||
151 | max * x86_pmu.pebs_record_size; | ||
152 | /* | ||
153 | * Always use single record PEBS | ||
154 | */ | ||
155 | ds->pebs_interrupt_threshold = ds->pebs_buffer_base + | ||
156 | x86_pmu.pebs_record_size; | ||
157 | } | ||
158 | |||
159 | err = 0; | ||
160 | } | ||
161 | |||
162 | if (err) | ||
163 | release_ds_buffers(); | ||
164 | else { | ||
165 | for_each_online_cpu(cpu) | ||
166 | init_debug_store_on_cpu(cpu); | ||
167 | } | ||
168 | |||
169 | put_online_cpus(); | ||
170 | |||
171 | return err; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * BTS | ||
176 | */ | ||
177 | |||
178 | static struct event_constraint bts_constraint = | ||
179 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
180 | |||
181 | static void intel_pmu_enable_bts(u64 config) | ||
182 | { | ||
183 | unsigned long debugctlmsr; | ||
184 | |||
185 | debugctlmsr = get_debugctlmsr(); | ||
186 | |||
187 | debugctlmsr |= DEBUGCTLMSR_TR; | ||
188 | debugctlmsr |= DEBUGCTLMSR_BTS; | ||
189 | debugctlmsr |= DEBUGCTLMSR_BTINT; | ||
190 | |||
191 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
192 | debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; | ||
193 | |||
194 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
195 | debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; | ||
196 | |||
197 | update_debugctlmsr(debugctlmsr); | ||
198 | } | ||
199 | |||
200 | static void intel_pmu_disable_bts(void) | ||
201 | { | ||
202 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
203 | unsigned long debugctlmsr; | ||
204 | |||
205 | if (!cpuc->ds) | ||
206 | return; | ||
207 | |||
208 | debugctlmsr = get_debugctlmsr(); | ||
209 | |||
210 | debugctlmsr &= | ||
211 | ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | | ||
212 | DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); | ||
213 | |||
214 | update_debugctlmsr(debugctlmsr); | ||
215 | } | ||
216 | |||
217 | static void intel_pmu_drain_bts_buffer(void) | ||
218 | { | ||
219 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
220 | struct debug_store *ds = cpuc->ds; | ||
221 | struct bts_record { | ||
222 | u64 from; | ||
223 | u64 to; | ||
224 | u64 flags; | ||
225 | }; | ||
226 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
227 | struct bts_record *at, *top; | ||
228 | struct perf_output_handle handle; | ||
229 | struct perf_event_header header; | ||
230 | struct perf_sample_data data; | ||
231 | struct pt_regs regs; | ||
232 | |||
233 | if (!event) | ||
234 | return; | ||
235 | |||
236 | if (!ds) | ||
237 | return; | ||
238 | |||
239 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
240 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
241 | |||
242 | if (top <= at) | ||
243 | return; | ||
244 | |||
245 | ds->bts_index = ds->bts_buffer_base; | ||
246 | |||
247 | perf_sample_data_init(&data, 0); | ||
248 | data.period = event->hw.last_period; | ||
249 | regs.ip = 0; | ||
250 | |||
251 | /* | ||
252 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
253 | * We will overwrite the from and to address before we output | ||
254 | * the sample. | ||
255 | */ | ||
256 | perf_prepare_sample(&header, &data, event, ®s); | ||
257 | |||
258 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) | ||
259 | return; | ||
260 | |||
261 | for (; at < top; at++) { | ||
262 | data.ip = at->from; | ||
263 | data.addr = at->to; | ||
264 | |||
265 | perf_output_sample(&handle, &header, &data, event); | ||
266 | } | ||
267 | |||
268 | perf_output_end(&handle); | ||
269 | |||
270 | /* There's new data available. */ | ||
271 | event->hw.interrupts++; | ||
272 | event->pending_kill = POLL_IN; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * PEBS | ||
277 | */ | ||
278 | |||
279 | static struct event_constraint intel_core_pebs_events[] = { | ||
280 | PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ | ||
281 | PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ | ||
282 | PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | ||
283 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ | ||
284 | PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
285 | PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
286 | PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
287 | PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
288 | PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
289 | EVENT_CONSTRAINT_END | ||
290 | }; | ||
291 | |||
292 | static struct event_constraint intel_nehalem_pebs_events[] = { | ||
293 | PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ | ||
294 | PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ | ||
295 | PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ | ||
296 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ | ||
297 | PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
298 | PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
299 | PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
300 | PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
301 | PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
302 | EVENT_CONSTRAINT_END | ||
303 | }; | ||
304 | |||
305 | static struct event_constraint * | ||
306 | intel_pebs_constraints(struct perf_event *event) | ||
307 | { | ||
308 | struct event_constraint *c; | ||
309 | |||
310 | if (!event->attr.precise_ip) | ||
311 | return NULL; | ||
312 | |||
313 | if (x86_pmu.pebs_constraints) { | ||
314 | for_each_event_constraint(c, x86_pmu.pebs_constraints) { | ||
315 | if ((event->hw.config & c->cmask) == c->code) | ||
316 | return c; | ||
317 | } | ||
318 | } | ||
319 | |||
320 | return &emptyconstraint; | ||
321 | } | ||
322 | |||
323 | static void intel_pmu_pebs_enable(struct perf_event *event) | ||
324 | { | ||
325 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
326 | struct hw_perf_event *hwc = &event->hw; | ||
327 | |||
328 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | ||
329 | |||
330 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | ||
331 | WARN_ON_ONCE(cpuc->enabled); | ||
332 | |||
333 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
334 | intel_pmu_lbr_enable(event); | ||
335 | } | ||
336 | |||
337 | static void intel_pmu_pebs_disable(struct perf_event *event) | ||
338 | { | ||
339 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
340 | struct hw_perf_event *hwc = &event->hw; | ||
341 | |||
342 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); | ||
343 | if (cpuc->enabled) | ||
344 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
345 | |||
346 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | ||
347 | |||
348 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
349 | intel_pmu_lbr_disable(event); | ||
350 | } | ||
351 | |||
352 | static void intel_pmu_pebs_enable_all(void) | ||
353 | { | ||
354 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
355 | |||
356 | if (cpuc->pebs_enabled) | ||
357 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
358 | } | ||
359 | |||
360 | static void intel_pmu_pebs_disable_all(void) | ||
361 | { | ||
362 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
363 | |||
364 | if (cpuc->pebs_enabled) | ||
365 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | ||
366 | } | ||
367 | |||
368 | #include <asm/insn.h> | ||
369 | |||
370 | static inline bool kernel_ip(unsigned long ip) | ||
371 | { | ||
372 | #ifdef CONFIG_X86_32 | ||
373 | return ip > PAGE_OFFSET; | ||
374 | #else | ||
375 | return (long)ip < 0; | ||
376 | #endif | ||
377 | } | ||
378 | |||
379 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | ||
380 | { | ||
381 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
382 | unsigned long from = cpuc->lbr_entries[0].from; | ||
383 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | ||
384 | unsigned long ip = regs->ip; | ||
385 | |||
386 | /* | ||
387 | * We don't need to fixup if the PEBS assist is fault like | ||
388 | */ | ||
389 | if (!x86_pmu.intel_cap.pebs_trap) | ||
390 | return 1; | ||
391 | |||
392 | /* | ||
393 | * No LBR entry, no basic block, no rewinding | ||
394 | */ | ||
395 | if (!cpuc->lbr_stack.nr || !from || !to) | ||
396 | return 0; | ||
397 | |||
398 | /* | ||
399 | * Basic blocks should never cross user/kernel boundaries | ||
400 | */ | ||
401 | if (kernel_ip(ip) != kernel_ip(to)) | ||
402 | return 0; | ||
403 | |||
404 | /* | ||
405 | * unsigned math, either ip is before the start (impossible) or | ||
406 | * the basic block is larger than 1 page (sanity) | ||
407 | */ | ||
408 | if ((ip - to) > PAGE_SIZE) | ||
409 | return 0; | ||
410 | |||
411 | /* | ||
412 | * We sampled a branch insn, rewind using the LBR stack | ||
413 | */ | ||
414 | if (ip == to) { | ||
415 | regs->ip = from; | ||
416 | return 1; | ||
417 | } | ||
418 | |||
419 | do { | ||
420 | struct insn insn; | ||
421 | u8 buf[MAX_INSN_SIZE]; | ||
422 | void *kaddr; | ||
423 | |||
424 | old_to = to; | ||
425 | if (!kernel_ip(ip)) { | ||
426 | int bytes, size = MAX_INSN_SIZE; | ||
427 | |||
428 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); | ||
429 | if (bytes != size) | ||
430 | return 0; | ||
431 | |||
432 | kaddr = buf; | ||
433 | } else | ||
434 | kaddr = (void *)to; | ||
435 | |||
436 | kernel_insn_init(&insn, kaddr); | ||
437 | insn_get_length(&insn); | ||
438 | to += insn.length; | ||
439 | } while (to < ip); | ||
440 | |||
441 | if (to == ip) { | ||
442 | regs->ip = old_to; | ||
443 | return 1; | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * Even though we decoded the basic block, the instruction stream | ||
448 | * never matched the given IP, either the TO or the IP got corrupted. | ||
449 | */ | ||
450 | return 0; | ||
451 | } | ||
452 | |||
453 | static int intel_pmu_save_and_restart(struct perf_event *event); | ||
454 | |||
455 | static void __intel_pmu_pebs_event(struct perf_event *event, | ||
456 | struct pt_regs *iregs, void *__pebs) | ||
457 | { | ||
458 | /* | ||
459 | * We cast to pebs_record_core since that is a subset of | ||
460 | * both formats and we don't use the other fields in this | ||
461 | * routine. | ||
462 | */ | ||
463 | struct pebs_record_core *pebs = __pebs; | ||
464 | struct perf_sample_data data; | ||
465 | struct pt_regs regs; | ||
466 | |||
467 | if (!intel_pmu_save_and_restart(event)) | ||
468 | return; | ||
469 | |||
470 | perf_sample_data_init(&data, 0); | ||
471 | data.period = event->hw.last_period; | ||
472 | |||
473 | /* | ||
474 | * We use the interrupt regs as a base because the PEBS record | ||
475 | * does not contain a full regs set, specifically it seems to | ||
476 | * lack segment descriptors, which get used by things like | ||
477 | * user_mode(). | ||
478 | * | ||
479 | * In the simple case fix up only the IP and BP,SP regs, for | ||
480 | * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. | ||
481 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. | ||
482 | */ | ||
483 | regs = *iregs; | ||
484 | regs.ip = pebs->ip; | ||
485 | regs.bp = pebs->bp; | ||
486 | regs.sp = pebs->sp; | ||
487 | |||
488 | if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) | ||
489 | regs.flags |= PERF_EFLAGS_EXACT; | ||
490 | else | ||
491 | regs.flags &= ~PERF_EFLAGS_EXACT; | ||
492 | |||
493 | if (perf_event_overflow(event, 1, &data, ®s)) | ||
494 | x86_pmu_stop(event); | ||
495 | } | ||
496 | |||
497 | static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | ||
498 | { | ||
499 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
500 | struct debug_store *ds = cpuc->ds; | ||
501 | struct perf_event *event = cpuc->events[0]; /* PMC0 only */ | ||
502 | struct pebs_record_core *at, *top; | ||
503 | int n; | ||
504 | |||
505 | if (!ds || !x86_pmu.pebs) | ||
506 | return; | ||
507 | |||
508 | at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; | ||
509 | top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; | ||
510 | |||
511 | /* | ||
512 | * Whatever else happens, drain the thing | ||
513 | */ | ||
514 | ds->pebs_index = ds->pebs_buffer_base; | ||
515 | |||
516 | if (!test_bit(0, cpuc->active_mask)) | ||
517 | return; | ||
518 | |||
519 | WARN_ON_ONCE(!event); | ||
520 | |||
521 | if (!event->attr.precise_ip) | ||
522 | return; | ||
523 | |||
524 | n = top - at; | ||
525 | if (n <= 0) | ||
526 | return; | ||
527 | |||
528 | /* | ||
529 | * Should not happen, we program the threshold at 1 and do not | ||
530 | * set a reset value. | ||
531 | */ | ||
532 | WARN_ON_ONCE(n > 1); | ||
533 | at += n - 1; | ||
534 | |||
535 | __intel_pmu_pebs_event(event, iregs, at); | ||
536 | } | ||
537 | |||
538 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | ||
539 | { | ||
540 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
541 | struct debug_store *ds = cpuc->ds; | ||
542 | struct pebs_record_nhm *at, *top; | ||
543 | struct perf_event *event = NULL; | ||
544 | u64 status = 0; | ||
545 | int bit, n; | ||
546 | |||
547 | if (!ds || !x86_pmu.pebs) | ||
548 | return; | ||
549 | |||
550 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
551 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
552 | |||
553 | ds->pebs_index = ds->pebs_buffer_base; | ||
554 | |||
555 | n = top - at; | ||
556 | if (n <= 0) | ||
557 | return; | ||
558 | |||
559 | /* | ||
560 | * Should not happen, we program the threshold at 1 and do not | ||
561 | * set a reset value. | ||
562 | */ | ||
563 | WARN_ON_ONCE(n > MAX_PEBS_EVENTS); | ||
564 | |||
565 | for ( ; at < top; at++) { | ||
566 | for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { | ||
567 | event = cpuc->events[bit]; | ||
568 | if (!test_bit(bit, cpuc->active_mask)) | ||
569 | continue; | ||
570 | |||
571 | WARN_ON_ONCE(!event); | ||
572 | |||
573 | if (!event->attr.precise_ip) | ||
574 | continue; | ||
575 | |||
576 | if (__test_and_set_bit(bit, (unsigned long *)&status)) | ||
577 | continue; | ||
578 | |||
579 | break; | ||
580 | } | ||
581 | |||
582 | if (!event || bit >= MAX_PEBS_EVENTS) | ||
583 | continue; | ||
584 | |||
585 | __intel_pmu_pebs_event(event, iregs, at); | ||
586 | } | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * BTS, PEBS probe and setup | ||
591 | */ | ||
592 | |||
593 | static void intel_ds_init(void) | ||
594 | { | ||
595 | /* | ||
596 | * No support for 32bit formats | ||
597 | */ | ||
598 | if (!boot_cpu_has(X86_FEATURE_DTES64)) | ||
599 | return; | ||
600 | |||
601 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); | ||
602 | x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); | ||
603 | if (x86_pmu.pebs) { | ||
604 | char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; | ||
605 | int format = x86_pmu.intel_cap.pebs_format; | ||
606 | |||
607 | switch (format) { | ||
608 | case 0: | ||
609 | printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); | ||
610 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); | ||
611 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; | ||
612 | x86_pmu.pebs_constraints = intel_core_pebs_events; | ||
613 | break; | ||
614 | |||
615 | case 1: | ||
616 | printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); | ||
617 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); | ||
618 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; | ||
619 | x86_pmu.pebs_constraints = intel_nehalem_pebs_events; | ||
620 | break; | ||
621 | |||
622 | default: | ||
623 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); | ||
624 | x86_pmu.pebs = 0; | ||
625 | break; | ||
626 | } | ||
627 | } | ||
628 | } | ||
629 | |||
630 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
631 | |||
632 | static int reserve_ds_buffers(void) | ||
633 | { | ||
634 | return 0; | ||
635 | } | ||
636 | |||
637 | static void release_ds_buffers(void) | ||
638 | { | ||
639 | } | ||
640 | |||
641 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c new file mode 100644 index 000000000000..d202c1bece1a --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -0,0 +1,218 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | enum { | ||
4 | LBR_FORMAT_32 = 0x00, | ||
5 | LBR_FORMAT_LIP = 0x01, | ||
6 | LBR_FORMAT_EIP = 0x02, | ||
7 | LBR_FORMAT_EIP_FLAGS = 0x03, | ||
8 | }; | ||
9 | |||
10 | /* | ||
11 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | ||
12 | * otherwise it becomes near impossible to get a reliable stack. | ||
13 | */ | ||
14 | |||
15 | static void __intel_pmu_lbr_enable(void) | ||
16 | { | ||
17 | u64 debugctl; | ||
18 | |||
19 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
20 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | ||
21 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
22 | } | ||
23 | |||
24 | static void __intel_pmu_lbr_disable(void) | ||
25 | { | ||
26 | u64 debugctl; | ||
27 | |||
28 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
29 | debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | ||
30 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
31 | } | ||
32 | |||
33 | static void intel_pmu_lbr_reset_32(void) | ||
34 | { | ||
35 | int i; | ||
36 | |||
37 | for (i = 0; i < x86_pmu.lbr_nr; i++) | ||
38 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
39 | } | ||
40 | |||
41 | static void intel_pmu_lbr_reset_64(void) | ||
42 | { | ||
43 | int i; | ||
44 | |||
45 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
46 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
47 | wrmsrl(x86_pmu.lbr_to + i, 0); | ||
48 | } | ||
49 | } | ||
50 | |||
51 | static void intel_pmu_lbr_reset(void) | ||
52 | { | ||
53 | if (!x86_pmu.lbr_nr) | ||
54 | return; | ||
55 | |||
56 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
57 | intel_pmu_lbr_reset_32(); | ||
58 | else | ||
59 | intel_pmu_lbr_reset_64(); | ||
60 | } | ||
61 | |||
62 | static void intel_pmu_lbr_enable(struct perf_event *event) | ||
63 | { | ||
64 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
65 | |||
66 | if (!x86_pmu.lbr_nr) | ||
67 | return; | ||
68 | |||
69 | WARN_ON_ONCE(cpuc->enabled); | ||
70 | |||
71 | /* | ||
72 | * Reset the LBR stack if we changed task context to | ||
73 | * avoid data leaks. | ||
74 | */ | ||
75 | |||
76 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { | ||
77 | intel_pmu_lbr_reset(); | ||
78 | cpuc->lbr_context = event->ctx; | ||
79 | } | ||
80 | |||
81 | cpuc->lbr_users++; | ||
82 | } | ||
83 | |||
84 | static void intel_pmu_lbr_disable(struct perf_event *event) | ||
85 | { | ||
86 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
87 | |||
88 | if (!x86_pmu.lbr_nr) | ||
89 | return; | ||
90 | |||
91 | cpuc->lbr_users--; | ||
92 | WARN_ON_ONCE(cpuc->lbr_users < 0); | ||
93 | |||
94 | if (cpuc->enabled && !cpuc->lbr_users) | ||
95 | __intel_pmu_lbr_disable(); | ||
96 | } | ||
97 | |||
98 | static void intel_pmu_lbr_enable_all(void) | ||
99 | { | ||
100 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
101 | |||
102 | if (cpuc->lbr_users) | ||
103 | __intel_pmu_lbr_enable(); | ||
104 | } | ||
105 | |||
106 | static void intel_pmu_lbr_disable_all(void) | ||
107 | { | ||
108 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
109 | |||
110 | if (cpuc->lbr_users) | ||
111 | __intel_pmu_lbr_disable(); | ||
112 | } | ||
113 | |||
114 | static inline u64 intel_pmu_lbr_tos(void) | ||
115 | { | ||
116 | u64 tos; | ||
117 | |||
118 | rdmsrl(x86_pmu.lbr_tos, tos); | ||
119 | |||
120 | return tos; | ||
121 | } | ||
122 | |||
123 | static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | ||
124 | { | ||
125 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
126 | u64 tos = intel_pmu_lbr_tos(); | ||
127 | int i; | ||
128 | |||
129 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
130 | unsigned long lbr_idx = (tos - i) & mask; | ||
131 | union { | ||
132 | struct { | ||
133 | u32 from; | ||
134 | u32 to; | ||
135 | }; | ||
136 | u64 lbr; | ||
137 | } msr_lastbranch; | ||
138 | |||
139 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | ||
140 | |||
141 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | ||
142 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | ||
143 | cpuc->lbr_entries[i].flags = 0; | ||
144 | } | ||
145 | cpuc->lbr_stack.nr = i; | ||
146 | } | ||
147 | |||
148 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
149 | |||
150 | /* | ||
151 | * Due to lack of segmentation in Linux the effective address (offset) | ||
152 | * is the same as the linear address, allowing us to merge the LIP and EIP | ||
153 | * LBR formats. | ||
154 | */ | ||
155 | static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | ||
156 | { | ||
157 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
158 | int lbr_format = x86_pmu.intel_cap.lbr_format; | ||
159 | u64 tos = intel_pmu_lbr_tos(); | ||
160 | int i; | ||
161 | |||
162 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
163 | unsigned long lbr_idx = (tos - i) & mask; | ||
164 | u64 from, to, flags = 0; | ||
165 | |||
166 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | ||
167 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | ||
168 | |||
169 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { | ||
170 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | ||
171 | from = (u64)((((s64)from) << 1) >> 1); | ||
172 | } | ||
173 | |||
174 | cpuc->lbr_entries[i].from = from; | ||
175 | cpuc->lbr_entries[i].to = to; | ||
176 | cpuc->lbr_entries[i].flags = flags; | ||
177 | } | ||
178 | cpuc->lbr_stack.nr = i; | ||
179 | } | ||
180 | |||
181 | static void intel_pmu_lbr_read(void) | ||
182 | { | ||
183 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
184 | |||
185 | if (!cpuc->lbr_users) | ||
186 | return; | ||
187 | |||
188 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
189 | intel_pmu_lbr_read_32(cpuc); | ||
190 | else | ||
191 | intel_pmu_lbr_read_64(cpuc); | ||
192 | } | ||
193 | |||
194 | static void intel_pmu_lbr_init_core(void) | ||
195 | { | ||
196 | x86_pmu.lbr_nr = 4; | ||
197 | x86_pmu.lbr_tos = 0x01c9; | ||
198 | x86_pmu.lbr_from = 0x40; | ||
199 | x86_pmu.lbr_to = 0x60; | ||
200 | } | ||
201 | |||
202 | static void intel_pmu_lbr_init_nhm(void) | ||
203 | { | ||
204 | x86_pmu.lbr_nr = 16; | ||
205 | x86_pmu.lbr_tos = 0x01c9; | ||
206 | x86_pmu.lbr_from = 0x680; | ||
207 | x86_pmu.lbr_to = 0x6c0; | ||
208 | } | ||
209 | |||
210 | static void intel_pmu_lbr_init_atom(void) | ||
211 | { | ||
212 | x86_pmu.lbr_nr = 8; | ||
213 | x86_pmu.lbr_tos = 0x01c9; | ||
214 | x86_pmu.lbr_from = 0x40; | ||
215 | x86_pmu.lbr_to = 0x60; | ||
216 | } | ||
217 | |||
218 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c new file mode 100644 index 000000000000..249015173992 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -0,0 +1,951 @@ | |||
1 | /* | ||
2 | * Netburst Perfomance Events (P4, old Xeon) | ||
3 | * | ||
4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | ||
5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | ||
6 | * | ||
7 | * For licencing details see kernel-base/COPYING | ||
8 | */ | ||
9 | |||
10 | #ifdef CONFIG_CPU_SUP_INTEL | ||
11 | |||
12 | #include <asm/perf_event_p4.h> | ||
13 | |||
14 | #define P4_CNTR_LIMIT 3 | ||
15 | /* | ||
16 | * array indices: 0,1 - HT threads, used with HT enabled cpu | ||
17 | */ | ||
18 | struct p4_event_bind { | ||
19 | unsigned int opcode; /* Event code and ESCR selector */ | ||
20 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | ||
21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ | ||
22 | }; | ||
23 | |||
24 | struct p4_pebs_bind { | ||
25 | unsigned int metric_pebs; | ||
26 | unsigned int metric_vert; | ||
27 | }; | ||
28 | |||
29 | /* it sets P4_PEBS_ENABLE_UOP_TAG as well */ | ||
30 | #define P4_GEN_PEBS_BIND(name, pebs, vert) \ | ||
31 | [P4_PEBS_METRIC__##name] = { \ | ||
32 | .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \ | ||
33 | .metric_vert = vert, \ | ||
34 | } | ||
35 | |||
36 | /* | ||
37 | * note we have P4_PEBS_ENABLE_UOP_TAG always set here | ||
38 | * | ||
39 | * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of | ||
40 | * event configuration to find out which values are to be | ||
41 | * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT | ||
42 | * resgisters | ||
43 | */ | ||
44 | static struct p4_pebs_bind p4_pebs_bind_map[] = { | ||
45 | P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001), | ||
46 | P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001), | ||
47 | P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001), | ||
48 | P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002), | ||
49 | P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003), | ||
50 | P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010), | ||
51 | P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001), | ||
52 | P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001), | ||
53 | P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002), | ||
54 | }; | ||
55 | |||
56 | /* | ||
57 | * Note that we don't use CCCR1 here, there is an | ||
58 | * exception for P4_BSQ_ALLOCATION but we just have | ||
59 | * no workaround | ||
60 | * | ||
61 | * consider this binding as resources which particular | ||
62 | * event may borrow, it doesn't contain EventMask, | ||
63 | * Tags and friends -- they are left to a caller | ||
64 | */ | ||
65 | static struct p4_event_bind p4_event_bind_map[] = { | ||
66 | [P4_EVENT_TC_DELIVER_MODE] = { | ||
67 | .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), | ||
68 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | ||
69 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
70 | }, | ||
71 | [P4_EVENT_BPU_FETCH_REQUEST] = { | ||
72 | .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), | ||
73 | .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, | ||
74 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
75 | }, | ||
76 | [P4_EVENT_ITLB_REFERENCE] = { | ||
77 | .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), | ||
78 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | ||
79 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
80 | }, | ||
81 | [P4_EVENT_MEMORY_CANCEL] = { | ||
82 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), | ||
83 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | ||
84 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
85 | }, | ||
86 | [P4_EVENT_MEMORY_COMPLETE] = { | ||
87 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), | ||
88 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | ||
89 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
90 | }, | ||
91 | [P4_EVENT_LOAD_PORT_REPLAY] = { | ||
92 | .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), | ||
93 | .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, | ||
94 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
95 | }, | ||
96 | [P4_EVENT_STORE_PORT_REPLAY] = { | ||
97 | .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), | ||
98 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | ||
99 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
100 | }, | ||
101 | [P4_EVENT_MOB_LOAD_REPLAY] = { | ||
102 | .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), | ||
103 | .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, | ||
104 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
105 | }, | ||
106 | [P4_EVENT_PAGE_WALK_TYPE] = { | ||
107 | .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), | ||
108 | .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, | ||
109 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
110 | }, | ||
111 | [P4_EVENT_BSQ_CACHE_REFERENCE] = { | ||
112 | .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), | ||
113 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | ||
114 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
115 | }, | ||
116 | [P4_EVENT_IOQ_ALLOCATION] = { | ||
117 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), | ||
118 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
119 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
120 | }, | ||
121 | [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | ||
122 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), | ||
123 | .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, | ||
124 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | ||
125 | }, | ||
126 | [P4_EVENT_FSB_DATA_ACTIVITY] = { | ||
127 | .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), | ||
128 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
129 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
130 | }, | ||
131 | [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ | ||
132 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), | ||
133 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, | ||
134 | .cntr = { {0, -1, -1}, {1, -1, -1} }, | ||
135 | }, | ||
136 | [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | ||
137 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), | ||
138 | .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, | ||
139 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | ||
140 | }, | ||
141 | [P4_EVENT_SSE_INPUT_ASSIST] = { | ||
142 | .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), | ||
143 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
144 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
145 | }, | ||
146 | [P4_EVENT_PACKED_SP_UOP] = { | ||
147 | .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), | ||
148 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
149 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
150 | }, | ||
151 | [P4_EVENT_PACKED_DP_UOP] = { | ||
152 | .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), | ||
153 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
154 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
155 | }, | ||
156 | [P4_EVENT_SCALAR_SP_UOP] = { | ||
157 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), | ||
158 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
159 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
160 | }, | ||
161 | [P4_EVENT_SCALAR_DP_UOP] = { | ||
162 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), | ||
163 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
164 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
165 | }, | ||
166 | [P4_EVENT_64BIT_MMX_UOP] = { | ||
167 | .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), | ||
168 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
169 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
170 | }, | ||
171 | [P4_EVENT_128BIT_MMX_UOP] = { | ||
172 | .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), | ||
173 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
174 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
175 | }, | ||
176 | [P4_EVENT_X87_FP_UOP] = { | ||
177 | .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), | ||
178 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
179 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
180 | }, | ||
181 | [P4_EVENT_TC_MISC] = { | ||
182 | .opcode = P4_OPCODE(P4_EVENT_TC_MISC), | ||
183 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | ||
184 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
185 | }, | ||
186 | [P4_EVENT_GLOBAL_POWER_EVENTS] = { | ||
187 | .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), | ||
188 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
189 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
190 | }, | ||
191 | [P4_EVENT_TC_MS_XFER] = { | ||
192 | .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), | ||
193 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | ||
194 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
195 | }, | ||
196 | [P4_EVENT_UOP_QUEUE_WRITES] = { | ||
197 | .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), | ||
198 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | ||
199 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
200 | }, | ||
201 | [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { | ||
202 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), | ||
203 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, | ||
204 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
205 | }, | ||
206 | [P4_EVENT_RETIRED_BRANCH_TYPE] = { | ||
207 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), | ||
208 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, | ||
209 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
210 | }, | ||
211 | [P4_EVENT_RESOURCE_STALL] = { | ||
212 | .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), | ||
213 | .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, | ||
214 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
215 | }, | ||
216 | [P4_EVENT_WC_BUFFER] = { | ||
217 | .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), | ||
218 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | ||
219 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
220 | }, | ||
221 | [P4_EVENT_B2B_CYCLES] = { | ||
222 | .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), | ||
223 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
224 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
225 | }, | ||
226 | [P4_EVENT_BNR] = { | ||
227 | .opcode = P4_OPCODE(P4_EVENT_BNR), | ||
228 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
229 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
230 | }, | ||
231 | [P4_EVENT_SNOOP] = { | ||
232 | .opcode = P4_OPCODE(P4_EVENT_SNOOP), | ||
233 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
234 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
235 | }, | ||
236 | [P4_EVENT_RESPONSE] = { | ||
237 | .opcode = P4_OPCODE(P4_EVENT_RESPONSE), | ||
238 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
239 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
240 | }, | ||
241 | [P4_EVENT_FRONT_END_EVENT] = { | ||
242 | .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), | ||
243 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
244 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
245 | }, | ||
246 | [P4_EVENT_EXECUTION_EVENT] = { | ||
247 | .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), | ||
248 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
249 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
250 | }, | ||
251 | [P4_EVENT_REPLAY_EVENT] = { | ||
252 | .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), | ||
253 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
254 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
255 | }, | ||
256 | [P4_EVENT_INSTR_RETIRED] = { | ||
257 | .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), | ||
258 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
259 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
260 | }, | ||
261 | [P4_EVENT_UOPS_RETIRED] = { | ||
262 | .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), | ||
263 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
264 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
265 | }, | ||
266 | [P4_EVENT_UOP_TYPE] = { | ||
267 | .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), | ||
268 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | ||
269 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
270 | }, | ||
271 | [P4_EVENT_BRANCH_RETIRED] = { | ||
272 | .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), | ||
273 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
274 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
275 | }, | ||
276 | [P4_EVENT_MISPRED_BRANCH_RETIRED] = { | ||
277 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), | ||
278 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
279 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
280 | }, | ||
281 | [P4_EVENT_X87_ASSIST] = { | ||
282 | .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), | ||
283 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
284 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
285 | }, | ||
286 | [P4_EVENT_MACHINE_CLEAR] = { | ||
287 | .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), | ||
288 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
289 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
290 | }, | ||
291 | [P4_EVENT_INSTR_COMPLETED] = { | ||
292 | .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), | ||
293 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
294 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
295 | }, | ||
296 | }; | ||
297 | |||
298 | #define P4_GEN_CACHE_EVENT(event, bit, metric) \ | ||
299 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ | ||
300 | P4_ESCR_EMASK_BIT(event, bit)) | \ | ||
301 | p4_config_pack_cccr(metric | \ | ||
302 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) | ||
303 | |||
304 | static __initconst const u64 p4_hw_cache_event_ids | ||
305 | [PERF_COUNT_HW_CACHE_MAX] | ||
306 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
307 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
308 | { | ||
309 | [ C(L1D ) ] = { | ||
310 | [ C(OP_READ) ] = { | ||
311 | [ C(RESULT_ACCESS) ] = 0x0, | ||
312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
313 | P4_PEBS_METRIC__1stl_cache_load_miss_retired), | ||
314 | }, | ||
315 | }, | ||
316 | [ C(LL ) ] = { | ||
317 | [ C(OP_READ) ] = { | ||
318 | [ C(RESULT_ACCESS) ] = 0x0, | ||
319 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
320 | P4_PEBS_METRIC__2ndl_cache_load_miss_retired), | ||
321 | }, | ||
322 | }, | ||
323 | [ C(DTLB) ] = { | ||
324 | [ C(OP_READ) ] = { | ||
325 | [ C(RESULT_ACCESS) ] = 0x0, | ||
326 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
327 | P4_PEBS_METRIC__dtlb_load_miss_retired), | ||
328 | }, | ||
329 | [ C(OP_WRITE) ] = { | ||
330 | [ C(RESULT_ACCESS) ] = 0x0, | ||
331 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
332 | P4_PEBS_METRIC__dtlb_store_miss_retired), | ||
333 | }, | ||
334 | }, | ||
335 | [ C(ITLB) ] = { | ||
336 | [ C(OP_READ) ] = { | ||
337 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, | ||
338 | P4_PEBS_METRIC__none), | ||
339 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, | ||
340 | P4_PEBS_METRIC__none), | ||
341 | }, | ||
342 | [ C(OP_WRITE) ] = { | ||
343 | [ C(RESULT_ACCESS) ] = -1, | ||
344 | [ C(RESULT_MISS) ] = -1, | ||
345 | }, | ||
346 | [ C(OP_PREFETCH) ] = { | ||
347 | [ C(RESULT_ACCESS) ] = -1, | ||
348 | [ C(RESULT_MISS) ] = -1, | ||
349 | }, | ||
350 | }, | ||
351 | }; | ||
352 | |||
353 | static u64 p4_general_events[PERF_COUNT_HW_MAX] = { | ||
354 | /* non-halted CPU clocks */ | ||
355 | [PERF_COUNT_HW_CPU_CYCLES] = | ||
356 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | | ||
357 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), | ||
358 | |||
359 | /* | ||
360 | * retired instructions | ||
361 | * in a sake of simplicity we don't use the FSB tagging | ||
362 | */ | ||
363 | [PERF_COUNT_HW_INSTRUCTIONS] = | ||
364 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) | | ||
365 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | | ||
366 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)), | ||
367 | |||
368 | /* cache hits */ | ||
369 | [PERF_COUNT_HW_CACHE_REFERENCES] = | ||
370 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | ||
371 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | ||
372 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | ||
373 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | ||
374 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | ||
375 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | ||
376 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)), | ||
377 | |||
378 | /* cache misses */ | ||
379 | [PERF_COUNT_HW_CACHE_MISSES] = | ||
380 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | ||
381 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | ||
382 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | ||
383 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)), | ||
384 | |||
385 | /* branch instructions retired */ | ||
386 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = | ||
387 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) | | ||
388 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | | ||
389 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | | ||
390 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | | ||
391 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)), | ||
392 | |||
393 | /* mispredicted branches retired */ | ||
394 | [PERF_COUNT_HW_BRANCH_MISSES] = | ||
395 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) | | ||
396 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)), | ||
397 | |||
398 | /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ | ||
399 | [PERF_COUNT_HW_BUS_CYCLES] = | ||
400 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) | | ||
401 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | | ||
402 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) | | ||
403 | p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), | ||
404 | }; | ||
405 | |||
406 | static struct p4_event_bind *p4_config_get_bind(u64 config) | ||
407 | { | ||
408 | unsigned int evnt = p4_config_unpack_event(config); | ||
409 | struct p4_event_bind *bind = NULL; | ||
410 | |||
411 | if (evnt < ARRAY_SIZE(p4_event_bind_map)) | ||
412 | bind = &p4_event_bind_map[evnt]; | ||
413 | |||
414 | return bind; | ||
415 | } | ||
416 | |||
417 | static u64 p4_pmu_event_map(int hw_event) | ||
418 | { | ||
419 | struct p4_event_bind *bind; | ||
420 | unsigned int esel; | ||
421 | u64 config; | ||
422 | |||
423 | config = p4_general_events[hw_event]; | ||
424 | bind = p4_config_get_bind(config); | ||
425 | esel = P4_OPCODE_ESEL(bind->opcode); | ||
426 | config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); | ||
427 | |||
428 | return config; | ||
429 | } | ||
430 | |||
431 | static int p4_validate_raw_event(struct perf_event *event) | ||
432 | { | ||
433 | unsigned int v; | ||
434 | |||
435 | /* user data may have out-of-bound event index */ | ||
436 | v = p4_config_unpack_event(event->attr.config); | ||
437 | if (v >= ARRAY_SIZE(p4_event_bind_map)) { | ||
438 | pr_warning("P4 PMU: Unknown event code: %d\n", v); | ||
439 | return -EINVAL; | ||
440 | } | ||
441 | |||
442 | /* | ||
443 | * it may have some screwed PEBS bits | ||
444 | */ | ||
445 | if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { | ||
446 | pr_warning("P4 PMU: PEBS are not supported yet\n"); | ||
447 | return -EINVAL; | ||
448 | } | ||
449 | v = p4_config_unpack_metric(event->attr.config); | ||
450 | if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { | ||
451 | pr_warning("P4 PMU: Unknown metric code: %d\n", v); | ||
452 | return -EINVAL; | ||
453 | } | ||
454 | |||
455 | return 0; | ||
456 | } | ||
457 | |||
458 | static int p4_hw_config(struct perf_event *event) | ||
459 | { | ||
460 | int cpu = get_cpu(); | ||
461 | int rc = 0; | ||
462 | u32 escr, cccr; | ||
463 | |||
464 | /* | ||
465 | * the reason we use cpu that early is that: if we get scheduled | ||
466 | * first time on the same cpu -- we will not need swap thread | ||
467 | * specific flags in config (and will save some cpu cycles) | ||
468 | */ | ||
469 | |||
470 | cccr = p4_default_cccr_conf(cpu); | ||
471 | escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel, | ||
472 | event->attr.exclude_user); | ||
473 | event->hw.config = p4_config_pack_escr(escr) | | ||
474 | p4_config_pack_cccr(cccr); | ||
475 | |||
476 | if (p4_ht_active() && p4_ht_thread(cpu)) | ||
477 | event->hw.config = p4_set_ht_bit(event->hw.config); | ||
478 | |||
479 | if (event->attr.type == PERF_TYPE_RAW) { | ||
480 | |||
481 | rc = p4_validate_raw_event(event); | ||
482 | if (rc) | ||
483 | goto out; | ||
484 | |||
485 | /* | ||
486 | * We don't control raw events so it's up to the caller | ||
487 | * to pass sane values (and we don't count the thread number | ||
488 | * on HT machine but allow HT-compatible specifics to be | ||
489 | * passed on) | ||
490 | * | ||
491 | * Note that for RAW events we allow user to use P4_CCCR_RESERVED | ||
492 | * bits since we keep additional info here (for cache events and etc) | ||
493 | * | ||
494 | * XXX: HT wide things should check perf_paranoid_cpu() && | ||
495 | * CAP_SYS_ADMIN | ||
496 | */ | ||
497 | event->hw.config |= event->attr.config & | ||
498 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | ||
499 | p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); | ||
500 | |||
501 | event->hw.config &= ~P4_CCCR_FORCE_OVF; | ||
502 | } | ||
503 | |||
504 | rc = x86_setup_perfctr(event); | ||
505 | out: | ||
506 | put_cpu(); | ||
507 | return rc; | ||
508 | } | ||
509 | |||
510 | static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | ||
511 | { | ||
512 | int overflow = 0; | ||
513 | u32 low, high; | ||
514 | |||
515 | rdmsr(hwc->config_base + hwc->idx, low, high); | ||
516 | |||
517 | /* we need to check high bit for unflagged overflows */ | ||
518 | if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { | ||
519 | overflow = 1; | ||
520 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
521 | ((u64)low) & ~P4_CCCR_OVF); | ||
522 | } | ||
523 | |||
524 | return overflow; | ||
525 | } | ||
526 | |||
527 | static void p4_pmu_disable_pebs(void) | ||
528 | { | ||
529 | /* | ||
530 | * FIXME | ||
531 | * | ||
532 | * It's still allowed that two threads setup same cache | ||
533 | * events so we can't simply clear metrics until we knew | ||
534 | * noone is depending on us, so we need kind of counter | ||
535 | * for "ReplayEvent" users. | ||
536 | * | ||
537 | * What is more complex -- RAW events, if user (for some | ||
538 | * reason) will pass some cache event metric with improper | ||
539 | * event opcode -- it's fine from hardware point of view | ||
540 | * but completely nonsence from "meaning" of such action. | ||
541 | * | ||
542 | * So at moment let leave metrics turned on forever -- it's | ||
543 | * ok for now but need to be revisited! | ||
544 | * | ||
545 | * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); | ||
546 | * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); | ||
547 | */ | ||
548 | } | ||
549 | |||
550 | static inline void p4_pmu_disable_event(struct perf_event *event) | ||
551 | { | ||
552 | struct hw_perf_event *hwc = &event->hw; | ||
553 | |||
554 | /* | ||
555 | * If event gets disabled while counter is in overflowed | ||
556 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | ||
557 | * asserted again and again | ||
558 | */ | ||
559 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
560 | (u64)(p4_config_unpack_cccr(hwc->config)) & | ||
561 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); | ||
562 | } | ||
563 | |||
564 | static void p4_pmu_disable_all(void) | ||
565 | { | ||
566 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
567 | int idx; | ||
568 | |||
569 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
570 | struct perf_event *event = cpuc->events[idx]; | ||
571 | if (!test_bit(idx, cpuc->active_mask)) | ||
572 | continue; | ||
573 | p4_pmu_disable_event(event); | ||
574 | } | ||
575 | |||
576 | p4_pmu_disable_pebs(); | ||
577 | } | ||
578 | |||
579 | /* configuration must be valid */ | ||
580 | static void p4_pmu_enable_pebs(u64 config) | ||
581 | { | ||
582 | struct p4_pebs_bind *bind; | ||
583 | unsigned int idx; | ||
584 | |||
585 | BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK); | ||
586 | |||
587 | idx = p4_config_unpack_metric(config); | ||
588 | if (idx == P4_PEBS_METRIC__none) | ||
589 | return; | ||
590 | |||
591 | bind = &p4_pebs_bind_map[idx]; | ||
592 | |||
593 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); | ||
594 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); | ||
595 | } | ||
596 | |||
597 | static void p4_pmu_enable_event(struct perf_event *event) | ||
598 | { | ||
599 | struct hw_perf_event *hwc = &event->hw; | ||
600 | int thread = p4_ht_config_thread(hwc->config); | ||
601 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | ||
602 | unsigned int idx = p4_config_unpack_event(hwc->config); | ||
603 | struct p4_event_bind *bind; | ||
604 | u64 escr_addr, cccr; | ||
605 | |||
606 | bind = &p4_event_bind_map[idx]; | ||
607 | escr_addr = (u64)bind->escr_msr[thread]; | ||
608 | |||
609 | /* | ||
610 | * - we dont support cascaded counters yet | ||
611 | * - and counter 1 is broken (erratum) | ||
612 | */ | ||
613 | WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); | ||
614 | WARN_ON_ONCE(hwc->idx == 1); | ||
615 | |||
616 | /* we need a real Event value */ | ||
617 | escr_conf &= ~P4_ESCR_EVENT_MASK; | ||
618 | escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode)); | ||
619 | |||
620 | cccr = p4_config_unpack_cccr(hwc->config); | ||
621 | |||
622 | /* | ||
623 | * it could be Cache event so we need to write metrics | ||
624 | * into additional MSRs | ||
625 | */ | ||
626 | p4_pmu_enable_pebs(hwc->config); | ||
627 | |||
628 | (void)checking_wrmsrl(escr_addr, escr_conf); | ||
629 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
630 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); | ||
631 | } | ||
632 | |||
633 | static void p4_pmu_enable_all(int added) | ||
634 | { | ||
635 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
636 | int idx; | ||
637 | |||
638 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
639 | struct perf_event *event = cpuc->events[idx]; | ||
640 | if (!test_bit(idx, cpuc->active_mask)) | ||
641 | continue; | ||
642 | p4_pmu_enable_event(event); | ||
643 | } | ||
644 | } | ||
645 | |||
646 | static int p4_pmu_handle_irq(struct pt_regs *regs) | ||
647 | { | ||
648 | struct perf_sample_data data; | ||
649 | struct cpu_hw_events *cpuc; | ||
650 | struct perf_event *event; | ||
651 | struct hw_perf_event *hwc; | ||
652 | int idx, handled = 0; | ||
653 | u64 val; | ||
654 | |||
655 | data.addr = 0; | ||
656 | data.raw = NULL; | ||
657 | |||
658 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
659 | |||
660 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
661 | int overflow; | ||
662 | |||
663 | if (!test_bit(idx, cpuc->active_mask)) { | ||
664 | /* catch in-flight IRQs */ | ||
665 | if (__test_and_clear_bit(idx, cpuc->running)) | ||
666 | handled++; | ||
667 | continue; | ||
668 | } | ||
669 | |||
670 | event = cpuc->events[idx]; | ||
671 | hwc = &event->hw; | ||
672 | |||
673 | WARN_ON_ONCE(hwc->idx != idx); | ||
674 | |||
675 | /* it might be unflagged overflow */ | ||
676 | overflow = p4_pmu_clear_cccr_ovf(hwc); | ||
677 | |||
678 | val = x86_perf_event_update(event); | ||
679 | if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) | ||
680 | continue; | ||
681 | |||
682 | handled += overflow; | ||
683 | |||
684 | /* event overflow for sure */ | ||
685 | data.period = event->hw.last_period; | ||
686 | |||
687 | if (!x86_perf_event_set_period(event)) | ||
688 | continue; | ||
689 | if (perf_event_overflow(event, 1, &data, regs)) | ||
690 | p4_pmu_disable_event(event); | ||
691 | } | ||
692 | |||
693 | if (handled) { | ||
694 | /* p4 quirk: unmask it again */ | ||
695 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
696 | inc_irq_stat(apic_perf_irqs); | ||
697 | } | ||
698 | |||
699 | return handled; | ||
700 | } | ||
701 | |||
702 | /* | ||
703 | * swap thread specific fields according to a thread | ||
704 | * we are going to run on | ||
705 | */ | ||
706 | static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | ||
707 | { | ||
708 | u32 escr, cccr; | ||
709 | |||
710 | /* | ||
711 | * we either lucky and continue on same cpu or no HT support | ||
712 | */ | ||
713 | if (!p4_should_swap_ts(hwc->config, cpu)) | ||
714 | return; | ||
715 | |||
716 | /* | ||
717 | * the event is migrated from an another logical | ||
718 | * cpu, so we need to swap thread specific flags | ||
719 | */ | ||
720 | |||
721 | escr = p4_config_unpack_escr(hwc->config); | ||
722 | cccr = p4_config_unpack_cccr(hwc->config); | ||
723 | |||
724 | if (p4_ht_thread(cpu)) { | ||
725 | cccr &= ~P4_CCCR_OVF_PMI_T0; | ||
726 | cccr |= P4_CCCR_OVF_PMI_T1; | ||
727 | if (escr & P4_ESCR_T0_OS) { | ||
728 | escr &= ~P4_ESCR_T0_OS; | ||
729 | escr |= P4_ESCR_T1_OS; | ||
730 | } | ||
731 | if (escr & P4_ESCR_T0_USR) { | ||
732 | escr &= ~P4_ESCR_T0_USR; | ||
733 | escr |= P4_ESCR_T1_USR; | ||
734 | } | ||
735 | hwc->config = p4_config_pack_escr(escr); | ||
736 | hwc->config |= p4_config_pack_cccr(cccr); | ||
737 | hwc->config |= P4_CONFIG_HT; | ||
738 | } else { | ||
739 | cccr &= ~P4_CCCR_OVF_PMI_T1; | ||
740 | cccr |= P4_CCCR_OVF_PMI_T0; | ||
741 | if (escr & P4_ESCR_T1_OS) { | ||
742 | escr &= ~P4_ESCR_T1_OS; | ||
743 | escr |= P4_ESCR_T0_OS; | ||
744 | } | ||
745 | if (escr & P4_ESCR_T1_USR) { | ||
746 | escr &= ~P4_ESCR_T1_USR; | ||
747 | escr |= P4_ESCR_T0_USR; | ||
748 | } | ||
749 | hwc->config = p4_config_pack_escr(escr); | ||
750 | hwc->config |= p4_config_pack_cccr(cccr); | ||
751 | hwc->config &= ~P4_CONFIG_HT; | ||
752 | } | ||
753 | } | ||
754 | |||
755 | /* | ||
756 | * ESCR address hashing is tricky, ESCRs are not sequential | ||
757 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and | ||
758 | * the metric between any ESCRs is laid in range [0xa0,0xe1] | ||
759 | * | ||
760 | * so we make ~70% filled hashtable | ||
761 | */ | ||
762 | |||
763 | #define P4_ESCR_MSR_BASE 0x000003a0 | ||
764 | #define P4_ESCR_MSR_MAX 0x000003e1 | ||
765 | #define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1) | ||
766 | #define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE) | ||
767 | #define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr | ||
768 | |||
769 | static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = { | ||
770 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0), | ||
771 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1), | ||
772 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0), | ||
773 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1), | ||
774 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0), | ||
775 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1), | ||
776 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0), | ||
777 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1), | ||
778 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2), | ||
779 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3), | ||
780 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4), | ||
781 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5), | ||
782 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0), | ||
783 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1), | ||
784 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0), | ||
785 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1), | ||
786 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0), | ||
787 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1), | ||
788 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0), | ||
789 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1), | ||
790 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0), | ||
791 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1), | ||
792 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0), | ||
793 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1), | ||
794 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0), | ||
795 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1), | ||
796 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0), | ||
797 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1), | ||
798 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0), | ||
799 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1), | ||
800 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0), | ||
801 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1), | ||
802 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0), | ||
803 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1), | ||
804 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0), | ||
805 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1), | ||
806 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0), | ||
807 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1), | ||
808 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0), | ||
809 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1), | ||
810 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0), | ||
811 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1), | ||
812 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0), | ||
813 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1), | ||
814 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0), | ||
815 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1), | ||
816 | }; | ||
817 | |||
818 | static int p4_get_escr_idx(unsigned int addr) | ||
819 | { | ||
820 | unsigned int idx = P4_ESCR_MSR_IDX(addr); | ||
821 | |||
822 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || | ||
823 | !p4_escr_table[idx] || | ||
824 | p4_escr_table[idx] != addr)) { | ||
825 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); | ||
826 | return -1; | ||
827 | } | ||
828 | |||
829 | return idx; | ||
830 | } | ||
831 | |||
832 | static int p4_next_cntr(int thread, unsigned long *used_mask, | ||
833 | struct p4_event_bind *bind) | ||
834 | { | ||
835 | int i, j; | ||
836 | |||
837 | for (i = 0; i < P4_CNTR_LIMIT; i++) { | ||
838 | j = bind->cntr[thread][i]; | ||
839 | if (j != -1 && !test_bit(j, used_mask)) | ||
840 | return j; | ||
841 | } | ||
842 | |||
843 | return -1; | ||
844 | } | ||
845 | |||
846 | static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | ||
847 | { | ||
848 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
849 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; | ||
850 | int cpu = smp_processor_id(); | ||
851 | struct hw_perf_event *hwc; | ||
852 | struct p4_event_bind *bind; | ||
853 | unsigned int i, thread, num; | ||
854 | int cntr_idx, escr_idx; | ||
855 | |||
856 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
857 | bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); | ||
858 | |||
859 | for (i = 0, num = n; i < n; i++, num--) { | ||
860 | |||
861 | hwc = &cpuc->event_list[i]->hw; | ||
862 | thread = p4_ht_thread(cpu); | ||
863 | bind = p4_config_get_bind(hwc->config); | ||
864 | escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); | ||
865 | if (unlikely(escr_idx == -1)) | ||
866 | goto done; | ||
867 | |||
868 | if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { | ||
869 | cntr_idx = hwc->idx; | ||
870 | if (assign) | ||
871 | assign[i] = hwc->idx; | ||
872 | goto reserve; | ||
873 | } | ||
874 | |||
875 | cntr_idx = p4_next_cntr(thread, used_mask, bind); | ||
876 | if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) | ||
877 | goto done; | ||
878 | |||
879 | p4_pmu_swap_config_ts(hwc, cpu); | ||
880 | if (assign) | ||
881 | assign[i] = cntr_idx; | ||
882 | reserve: | ||
883 | set_bit(cntr_idx, used_mask); | ||
884 | set_bit(escr_idx, escr_mask); | ||
885 | } | ||
886 | |||
887 | done: | ||
888 | return num ? -ENOSPC : 0; | ||
889 | } | ||
890 | |||
891 | static __initconst const struct x86_pmu p4_pmu = { | ||
892 | .name = "Netburst P4/Xeon", | ||
893 | .handle_irq = p4_pmu_handle_irq, | ||
894 | .disable_all = p4_pmu_disable_all, | ||
895 | .enable_all = p4_pmu_enable_all, | ||
896 | .enable = p4_pmu_enable_event, | ||
897 | .disable = p4_pmu_disable_event, | ||
898 | .eventsel = MSR_P4_BPU_CCCR0, | ||
899 | .perfctr = MSR_P4_BPU_PERFCTR0, | ||
900 | .event_map = p4_pmu_event_map, | ||
901 | .max_events = ARRAY_SIZE(p4_general_events), | ||
902 | .get_event_constraints = x86_get_event_constraints, | ||
903 | /* | ||
904 | * IF HT disabled we may need to use all | ||
905 | * ARCH_P4_MAX_CCCR counters simulaneously | ||
906 | * though leave it restricted at moment assuming | ||
907 | * HT is on | ||
908 | */ | ||
909 | .num_counters = ARCH_P4_MAX_CCCR, | ||
910 | .apic = 1, | ||
911 | .cntval_bits = 40, | ||
912 | .cntval_mask = (1ULL << 40) - 1, | ||
913 | .max_period = (1ULL << 39) - 1, | ||
914 | .hw_config = p4_hw_config, | ||
915 | .schedule_events = p4_pmu_schedule_events, | ||
916 | /* | ||
917 | * This handles erratum N15 in intel doc 249199-029, | ||
918 | * the counter may not be updated correctly on write | ||
919 | * so we need a second write operation to do the trick | ||
920 | * (the official workaround didn't work) | ||
921 | * | ||
922 | * the former idea is taken from OProfile code | ||
923 | */ | ||
924 | .perfctr_second_write = 1, | ||
925 | }; | ||
926 | |||
927 | static __init int p4_pmu_init(void) | ||
928 | { | ||
929 | unsigned int low, high; | ||
930 | |||
931 | /* If we get stripped -- indexig fails */ | ||
932 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | ||
933 | |||
934 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | ||
935 | if (!(low & (1 << 7))) { | ||
936 | pr_cont("unsupported Netburst CPU model %d ", | ||
937 | boot_cpu_data.x86_model); | ||
938 | return -ENODEV; | ||
939 | } | ||
940 | |||
941 | memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, | ||
942 | sizeof(hw_cache_event_ids)); | ||
943 | |||
944 | pr_cont("Netburst events, "); | ||
945 | |||
946 | x86_pmu = p4_pmu; | ||
947 | |||
948 | return 0; | ||
949 | } | ||
950 | |||
951 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index a330485d14da..34ba07be2cda 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -27,24 +27,6 @@ static u64 p6_pmu_event_map(int hw_event) | |||
27 | */ | 27 | */ |
28 | #define P6_NOP_EVENT 0x0000002EULL | 28 | #define P6_NOP_EVENT 0x0000002EULL |
29 | 29 | ||
30 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
31 | { | ||
32 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
33 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
34 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
35 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
36 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
37 | |||
38 | #define P6_EVNTSEL_MASK \ | ||
39 | (P6_EVNTSEL_EVENT_MASK | \ | ||
40 | P6_EVNTSEL_UNIT_MASK | \ | ||
41 | P6_EVNTSEL_EDGE_MASK | \ | ||
42 | P6_EVNTSEL_INV_MASK | \ | ||
43 | P6_EVNTSEL_REG_MASK) | ||
44 | |||
45 | return hw_event & P6_EVNTSEL_MASK; | ||
46 | } | ||
47 | |||
48 | static struct event_constraint p6_event_constraints[] = | 30 | static struct event_constraint p6_event_constraints[] = |
49 | { | 31 | { |
50 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | 32 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ |
@@ -66,7 +48,7 @@ static void p6_pmu_disable_all(void) | |||
66 | wrmsrl(MSR_P6_EVNTSEL0, val); | 48 | wrmsrl(MSR_P6_EVNTSEL0, val); |
67 | } | 49 | } |
68 | 50 | ||
69 | static void p6_pmu_enable_all(void) | 51 | static void p6_pmu_enable_all(int added) |
70 | { | 52 | { |
71 | unsigned long val; | 53 | unsigned long val; |
72 | 54 | ||
@@ -102,22 +84,23 @@ static void p6_pmu_enable_event(struct perf_event *event) | |||
102 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | 84 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); |
103 | } | 85 | } |
104 | 86 | ||
105 | static __initconst struct x86_pmu p6_pmu = { | 87 | static __initconst const struct x86_pmu p6_pmu = { |
106 | .name = "p6", | 88 | .name = "p6", |
107 | .handle_irq = x86_pmu_handle_irq, | 89 | .handle_irq = x86_pmu_handle_irq, |
108 | .disable_all = p6_pmu_disable_all, | 90 | .disable_all = p6_pmu_disable_all, |
109 | .enable_all = p6_pmu_enable_all, | 91 | .enable_all = p6_pmu_enable_all, |
110 | .enable = p6_pmu_enable_event, | 92 | .enable = p6_pmu_enable_event, |
111 | .disable = p6_pmu_disable_event, | 93 | .disable = p6_pmu_disable_event, |
94 | .hw_config = x86_pmu_hw_config, | ||
95 | .schedule_events = x86_schedule_events, | ||
112 | .eventsel = MSR_P6_EVNTSEL0, | 96 | .eventsel = MSR_P6_EVNTSEL0, |
113 | .perfctr = MSR_P6_PERFCTR0, | 97 | .perfctr = MSR_P6_PERFCTR0, |
114 | .event_map = p6_pmu_event_map, | 98 | .event_map = p6_pmu_event_map, |
115 | .raw_event = p6_pmu_raw_event, | ||
116 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | 99 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), |
117 | .apic = 1, | 100 | .apic = 1, |
118 | .max_period = (1ULL << 31) - 1, | 101 | .max_period = (1ULL << 31) - 1, |
119 | .version = 0, | 102 | .version = 0, |
120 | .num_events = 2, | 103 | .num_counters = 2, |
121 | /* | 104 | /* |
122 | * Events have 40 bits implemented. However they are designed such | 105 | * Events have 40 bits implemented. However they are designed such |
123 | * that bits [32-39] are sign extensions of bit 31. As such the | 106 | * that bits [32-39] are sign extensions of bit 31. As such the |
@@ -125,8 +108,8 @@ static __initconst struct x86_pmu p6_pmu = { | |||
125 | * | 108 | * |
126 | * See IA-32 Intel Architecture Software developer manual Vol 3B | 109 | * See IA-32 Intel Architecture Software developer manual Vol 3B |
127 | */ | 110 | */ |
128 | .event_bits = 32, | 111 | .cntval_bits = 32, |
129 | .event_mask = (1ULL << 32) - 1, | 112 | .cntval_mask = (1ULL << 32) - 1, |
130 | .get_event_constraints = x86_get_event_constraints, | 113 | .get_event_constraints = x86_get_event_constraints, |
131 | .event_constraints = p6_event_constraints, | 114 | .event_constraints = p6_event_constraints, |
132 | }; | 115 | }; |
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c new file mode 100644 index 000000000000..d49079515122 --- /dev/null +++ b/arch/x86/kernel/cpu/scattered.c | |||
@@ -0,0 +1,64 @@ | |||
1 | /* | ||
2 | * Routines to indentify additional cpu features that are scattered in | ||
3 | * cpuid space. | ||
4 | */ | ||
5 | #include <linux/cpu.h> | ||
6 | |||
7 | #include <asm/pat.h> | ||
8 | #include <asm/processor.h> | ||
9 | |||
10 | #include <asm/apic.h> | ||
11 | |||
12 | struct cpuid_bit { | ||
13 | u16 feature; | ||
14 | u8 reg; | ||
15 | u8 bit; | ||
16 | u32 level; | ||
17 | u32 sub_leaf; | ||
18 | }; | ||
19 | |||
20 | enum cpuid_regs { | ||
21 | CR_EAX = 0, | ||
22 | CR_ECX, | ||
23 | CR_EDX, | ||
24 | CR_EBX | ||
25 | }; | ||
26 | |||
27 | void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | ||
28 | { | ||
29 | u32 max_level; | ||
30 | u32 regs[4]; | ||
31 | const struct cpuid_bit *cb; | ||
32 | |||
33 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | ||
34 | { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 }, | ||
35 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, | ||
36 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, | ||
37 | { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, | ||
38 | { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 }, | ||
39 | { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, | ||
40 | { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, | ||
41 | { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 }, | ||
42 | { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, | ||
43 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 }, | ||
44 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, | ||
45 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, | ||
46 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, | ||
47 | { 0, 0, 0, 0, 0 } | ||
48 | }; | ||
49 | |||
50 | for (cb = cpuid_bits; cb->feature; cb++) { | ||
51 | |||
52 | /* Verify that the level is valid */ | ||
53 | max_level = cpuid_eax(cb->level & 0xffff0000); | ||
54 | if (max_level < cb->level || | ||
55 | max_level > (cb->level | 0xffff)) | ||
56 | continue; | ||
57 | |||
58 | cpuid_count(cb->level, cb->sub_leaf, ®s[CR_EAX], | ||
59 | ®s[CR_EBX], ®s[CR_ECX], ®s[CR_EDX]); | ||
60 | |||
61 | if (regs[cb->reg] & (1 << cb->bit)) | ||
62 | set_cpu_cap(c, cb->feature); | ||
63 | } | ||
64 | } | ||
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/topology.c index 97ad79cdf688..4397e987a1cf 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/topology.c | |||
@@ -1,60 +1,14 @@ | |||
1 | /* | 1 | /* |
2 | * Routines to indentify additional cpu features that are scattered in | 2 | * Check for extended topology enumeration cpuid leaf 0xb and if it |
3 | * cpuid space. | 3 | * exists, use it for populating initial_apicid and cpu topology |
4 | * detection. | ||
4 | */ | 5 | */ |
5 | #include <linux/cpu.h> | ||
6 | 6 | ||
7 | #include <linux/cpu.h> | ||
8 | #include <asm/apic.h> | ||
7 | #include <asm/pat.h> | 9 | #include <asm/pat.h> |
8 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
9 | 11 | ||
10 | #include <asm/apic.h> | ||
11 | |||
12 | struct cpuid_bit { | ||
13 | u16 feature; | ||
14 | u8 reg; | ||
15 | u8 bit; | ||
16 | u32 level; | ||
17 | }; | ||
18 | |||
19 | enum cpuid_regs { | ||
20 | CR_EAX = 0, | ||
21 | CR_ECX, | ||
22 | CR_EDX, | ||
23 | CR_EBX | ||
24 | }; | ||
25 | |||
26 | void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | ||
27 | { | ||
28 | u32 max_level; | ||
29 | u32 regs[4]; | ||
30 | const struct cpuid_bit *cb; | ||
31 | |||
32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | ||
33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, | ||
34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, | ||
35 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, | ||
36 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, | ||
37 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | ||
38 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | ||
39 | { 0, 0, 0, 0 } | ||
40 | }; | ||
41 | |||
42 | for (cb = cpuid_bits; cb->feature; cb++) { | ||
43 | |||
44 | /* Verify that the level is valid */ | ||
45 | max_level = cpuid_eax(cb->level & 0xffff0000); | ||
46 | if (max_level < cb->level || | ||
47 | max_level > (cb->level | 0xffff)) | ||
48 | continue; | ||
49 | |||
50 | cpuid(cb->level, ®s[CR_EAX], ®s[CR_EBX], | ||
51 | ®s[CR_ECX], ®s[CR_EDX]); | ||
52 | |||
53 | if (regs[cb->reg] & (1 << cb->bit)) | ||
54 | set_cpu_cap(c, cb->feature); | ||
55 | } | ||
56 | } | ||
57 | |||
58 | /* leaf 0xb SMT level */ | 12 | /* leaf 0xb SMT level */ |
59 | #define SMT_LEVEL 0 | 13 | #define SMT_LEVEL 0 |
60 | 14 | ||
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index dfdb4dba2320..227b0448960d 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -24,8 +24,8 @@ | |||
24 | #include <linux/dmi.h> | 24 | #include <linux/dmi.h> |
25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
26 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
27 | #include <asm/vmware.h> | ||
28 | #include <asm/x86_init.h> | 27 | #include <asm/x86_init.h> |
28 | #include <asm/hypervisor.h> | ||
29 | 29 | ||
30 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 | 30 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 |
31 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 | 31 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 |
@@ -51,7 +51,7 @@ static inline int __vmware_platform(void) | |||
51 | 51 | ||
52 | static unsigned long vmware_get_tsc_khz(void) | 52 | static unsigned long vmware_get_tsc_khz(void) |
53 | { | 53 | { |
54 | uint64_t tsc_hz; | 54 | uint64_t tsc_hz, lpj; |
55 | uint32_t eax, ebx, ecx, edx; | 55 | uint32_t eax, ebx, ecx, edx; |
56 | 56 | ||
57 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); | 57 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); |
@@ -62,10 +62,17 @@ static unsigned long vmware_get_tsc_khz(void) | |||
62 | printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", | 62 | printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", |
63 | (unsigned long) tsc_hz / 1000, | 63 | (unsigned long) tsc_hz / 1000, |
64 | (unsigned long) tsc_hz % 1000); | 64 | (unsigned long) tsc_hz % 1000); |
65 | |||
66 | if (!preset_lpj) { | ||
67 | lpj = ((u64)tsc_hz * 1000); | ||
68 | do_div(lpj, HZ); | ||
69 | preset_lpj = lpj; | ||
70 | } | ||
71 | |||
65 | return tsc_hz; | 72 | return tsc_hz; |
66 | } | 73 | } |
67 | 74 | ||
68 | void __init vmware_platform_setup(void) | 75 | static void __init vmware_platform_setup(void) |
69 | { | 76 | { |
70 | uint32_t eax, ebx, ecx, edx; | 77 | uint32_t eax, ebx, ecx, edx; |
71 | 78 | ||
@@ -83,26 +90,22 @@ void __init vmware_platform_setup(void) | |||
83 | * serial key should be enough, as this will always have a VMware | 90 | * serial key should be enough, as this will always have a VMware |
84 | * specific string when running under VMware hypervisor. | 91 | * specific string when running under VMware hypervisor. |
85 | */ | 92 | */ |
86 | int vmware_platform(void) | 93 | static bool __init vmware_platform(void) |
87 | { | 94 | { |
88 | if (cpu_has_hypervisor) { | 95 | if (cpu_has_hypervisor) { |
89 | unsigned int eax, ebx, ecx, edx; | 96 | unsigned int eax; |
90 | char hyper_vendor_id[13]; | 97 | unsigned int hyper_vendor_id[3]; |
91 | 98 | ||
92 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx); | 99 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0], |
93 | memcpy(hyper_vendor_id + 0, &ebx, 4); | 100 | &hyper_vendor_id[1], &hyper_vendor_id[2]); |
94 | memcpy(hyper_vendor_id + 4, &ecx, 4); | 101 | if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) |
95 | memcpy(hyper_vendor_id + 8, &edx, 4); | 102 | return true; |
96 | hyper_vendor_id[12] = '\0'; | ||
97 | if (!strcmp(hyper_vendor_id, "VMwareVMware")) | ||
98 | return 1; | ||
99 | } else if (dmi_available && dmi_name_in_serial("VMware") && | 103 | } else if (dmi_available && dmi_name_in_serial("VMware") && |
100 | __vmware_platform()) | 104 | __vmware_platform()) |
101 | return 1; | 105 | return true; |
102 | 106 | ||
103 | return 0; | 107 | return false; |
104 | } | 108 | } |
105 | EXPORT_SYMBOL(vmware_platform); | ||
106 | 109 | ||
107 | /* | 110 | /* |
108 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. | 111 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. |
@@ -116,8 +119,16 @@ EXPORT_SYMBOL(vmware_platform); | |||
116 | * so that the kernel could just trust the hypervisor with providing a | 119 | * so that the kernel could just trust the hypervisor with providing a |
117 | * reliable virtual TSC that is suitable for timekeeping. | 120 | * reliable virtual TSC that is suitable for timekeeping. |
118 | */ | 121 | */ |
119 | void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c) | 122 | static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c) |
120 | { | 123 | { |
121 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 124 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
122 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); | 125 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); |
123 | } | 126 | } |
127 | |||
128 | const __refconst struct hypervisor_x86 x86_hyper_vmware = { | ||
129 | .name = "VMware", | ||
130 | .detect = vmware_platform, | ||
131 | .set_cpu_features = vmware_set_cpu_features, | ||
132 | .init_platform = vmware_platform_setup, | ||
133 | }; | ||
134 | EXPORT_SYMBOL(x86_hyper_vmware); | ||