aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/amd.c50
-rw-r--r--arch/x86/kernel/cpu/centaur.c272
-rw-r--r--arch/x86/kernel/cpu/common.c18
-rw-r--r--arch/x86/kernel/cpu/intel.c53
-rw-r--r--arch/x86/kernel/cpu/match.c42
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c84
-rw-r--r--arch/x86/kernel/cpu/perf_event.c54
-rw-r--r--arch/x86/kernel/cpu/perf_event.h9
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c557
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h5
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c34
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c48
13 files changed, 705 insertions, 532 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c67ffa686064..ce8b8ff0e0ef 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -218,7 +218,7 @@ static void amd_k7_smp_check(struct cpuinfo_x86 *c)
218 */ 218 */
219 WARN_ONCE(1, "WARNING: This combination of AMD" 219 WARN_ONCE(1, "WARNING: This combination of AMD"
220 " processors is not suitable for SMP.\n"); 220 " processors is not suitable for SMP.\n");
221 add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE); 221 add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
222} 222}
223 223
224static void init_amd_k7(struct cpuinfo_x86 *c) 224static void init_amd_k7(struct cpuinfo_x86 *c)
@@ -233,9 +233,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
233 if (c->x86_model >= 6 && c->x86_model <= 10) { 233 if (c->x86_model >= 6 && c->x86_model <= 10) {
234 if (!cpu_has(c, X86_FEATURE_XMM)) { 234 if (!cpu_has(c, X86_FEATURE_XMM)) {
235 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); 235 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
236 rdmsr(MSR_K7_HWCR, l, h); 236 msr_clear_bit(MSR_K7_HWCR, 15);
237 l &= ~0x00008000;
238 wrmsr(MSR_K7_HWCR, l, h);
239 set_cpu_cap(c, X86_FEATURE_XMM); 237 set_cpu_cap(c, X86_FEATURE_XMM);
240 } 238 }
241 } 239 }
@@ -509,14 +507,8 @@ static void early_init_amd(struct cpuinfo_x86 *c)
509#endif 507#endif
510 508
511 /* F16h erratum 793, CVE-2013-6885 */ 509 /* F16h erratum 793, CVE-2013-6885 */
512 if (c->x86 == 0x16 && c->x86_model <= 0xf) { 510 if (c->x86 == 0x16 && c->x86_model <= 0xf)
513 u64 val; 511 msr_set_bit(MSR_AMD64_LS_CFG, 15);
514
515 rdmsrl(MSR_AMD64_LS_CFG, val);
516 if (!(val & BIT(15)))
517 wrmsrl(MSR_AMD64_LS_CFG, val | BIT(15));
518 }
519
520} 512}
521 513
522static const int amd_erratum_383[]; 514static const int amd_erratum_383[];
@@ -536,11 +528,8 @@ static void init_amd(struct cpuinfo_x86 *c)
536 * Errata 63 for SH-B3 steppings 528 * Errata 63 for SH-B3 steppings
537 * Errata 122 for all steppings (F+ have it disabled by default) 529 * Errata 122 for all steppings (F+ have it disabled by default)
538 */ 530 */
539 if (c->x86 == 0xf) { 531 if (c->x86 == 0xf)
540 rdmsrl(MSR_K7_HWCR, value); 532 msr_set_bit(MSR_K7_HWCR, 6);
541 value |= 1 << 6;
542 wrmsrl(MSR_K7_HWCR, value);
543 }
544#endif 533#endif
545 534
546 early_init_amd(c); 535 early_init_amd(c);
@@ -623,14 +612,11 @@ static void init_amd(struct cpuinfo_x86 *c)
623 (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && 612 (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) &&
624 !cpu_has(c, X86_FEATURE_TOPOEXT)) { 613 !cpu_has(c, X86_FEATURE_TOPOEXT)) {
625 614
626 if (!rdmsrl_safe(0xc0011005, &value)) { 615 if (msr_set_bit(0xc0011005, 54) > 0) {
627 value |= 1ULL << 54;
628 wrmsrl_safe(0xc0011005, value);
629 rdmsrl(0xc0011005, value); 616 rdmsrl(0xc0011005, value);
630 if (value & (1ULL << 54)) { 617 if (value & BIT_64(54)) {
631 set_cpu_cap(c, X86_FEATURE_TOPOEXT); 618 set_cpu_cap(c, X86_FEATURE_TOPOEXT);
632 printk(KERN_INFO FW_INFO "CPU: Re-enabling " 619 pr_info(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
633 "disabled Topology Extensions Support\n");
634 } 620 }
635 } 621 }
636 } 622 }
@@ -709,19 +695,12 @@ static void init_amd(struct cpuinfo_x86 *c)
709 * Disable GART TLB Walk Errors on Fam10h. We do this here 695 * Disable GART TLB Walk Errors on Fam10h. We do this here
710 * because this is always needed when GART is enabled, even in a 696 * because this is always needed when GART is enabled, even in a
711 * kernel which has no MCE support built in. 697 * kernel which has no MCE support built in.
712 * BIOS should disable GartTlbWlk Errors themself. If 698 * BIOS should disable GartTlbWlk Errors already. If
713 * it doesn't do it here as suggested by the BKDG. 699 * it doesn't, do it here as suggested by the BKDG.
714 * 700 *
715 * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 701 * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012
716 */ 702 */
717 u64 mask; 703 msr_set_bit(MSR_AMD64_MCx_MASK(4), 10);
718 int err;
719
720 err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask);
721 if (err == 0) {
722 mask |= (1 << 10);
723 wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask);
724 }
725 704
726 /* 705 /*
727 * On family 10h BIOS may not have properly enabled WC+ support, 706 * On family 10h BIOS may not have properly enabled WC+ support,
@@ -733,10 +712,7 @@ static void init_amd(struct cpuinfo_x86 *c)
733 * NOTE: we want to use the _safe accessors so as not to #GP kvm 712 * NOTE: we want to use the _safe accessors so as not to #GP kvm
734 * guests on older kvm hosts. 713 * guests on older kvm hosts.
735 */ 714 */
736 715 msr_clear_bit(MSR_AMD64_BU_CFG2, 24);
737 rdmsrl_safe(MSR_AMD64_BU_CFG2, &value);
738 value &= ~(1ULL << 24);
739 wrmsrl_safe(MSR_AMD64_BU_CFG2, value);
740 716
741 if (cpu_has_amd_erratum(c, amd_erratum_383)) 717 if (cpu_has_amd_erratum(c, amd_erratum_383))
742 set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); 718 set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 8779edab684e..d8fba5c15fbd 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -8,236 +8,6 @@
8 8
9#include "cpu.h" 9#include "cpu.h"
10 10
11#ifdef CONFIG_X86_OOSTORE
12
13static u32 power2(u32 x)
14{
15 u32 s = 1;
16
17 while (s <= x)
18 s <<= 1;
19
20 return s >>= 1;
21}
22
23
24/*
25 * Set up an actual MCR
26 */
27static void centaur_mcr_insert(int reg, u32 base, u32 size, int key)
28{
29 u32 lo, hi;
30
31 hi = base & ~0xFFF;
32 lo = ~(size-1); /* Size is a power of 2 so this makes a mask */
33 lo &= ~0xFFF; /* Remove the ctrl value bits */
34 lo |= key; /* Attribute we wish to set */
35 wrmsr(reg+MSR_IDT_MCR0, lo, hi);
36 mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */
37}
38
39/*
40 * Figure what we can cover with MCR's
41 *
42 * Shortcut: We know you can't put 4Gig of RAM on a winchip
43 */
44static u32 ramtop(void)
45{
46 u32 clip = 0xFFFFFFFFUL;
47 u32 top = 0;
48 int i;
49
50 for (i = 0; i < e820.nr_map; i++) {
51 unsigned long start, end;
52
53 if (e820.map[i].addr > 0xFFFFFFFFUL)
54 continue;
55 /*
56 * Don't MCR over reserved space. Ignore the ISA hole
57 * we frob around that catastrophe already
58 */
59 if (e820.map[i].type == E820_RESERVED) {
60 if (e820.map[i].addr >= 0x100000UL &&
61 e820.map[i].addr < clip)
62 clip = e820.map[i].addr;
63 continue;
64 }
65 start = e820.map[i].addr;
66 end = e820.map[i].addr + e820.map[i].size;
67 if (start >= end)
68 continue;
69 if (end > top)
70 top = end;
71 }
72 /*
73 * Everything below 'top' should be RAM except for the ISA hole.
74 * Because of the limited MCR's we want to map NV/ACPI into our
75 * MCR range for gunk in RAM
76 *
77 * Clip might cause us to MCR insufficient RAM but that is an
78 * acceptable failure mode and should only bite obscure boxes with
79 * a VESA hole at 15Mb
80 *
81 * The second case Clip sometimes kicks in is when the EBDA is marked
82 * as reserved. Again we fail safe with reasonable results
83 */
84 if (top > clip)
85 top = clip;
86
87 return top;
88}
89
90/*
91 * Compute a set of MCR's to give maximum coverage
92 */
93static int centaur_mcr_compute(int nr, int key)
94{
95 u32 mem = ramtop();
96 u32 root = power2(mem);
97 u32 base = root;
98 u32 top = root;
99 u32 floor = 0;
100 int ct = 0;
101
102 while (ct < nr) {
103 u32 fspace = 0;
104 u32 high;
105 u32 low;
106
107 /*
108 * Find the largest block we will fill going upwards
109 */
110 high = power2(mem-top);
111
112 /*
113 * Find the largest block we will fill going downwards
114 */
115 low = base/2;
116
117 /*
118 * Don't fill below 1Mb going downwards as there
119 * is an ISA hole in the way.
120 */
121 if (base <= 1024*1024)
122 low = 0;
123
124 /*
125 * See how much space we could cover by filling below
126 * the ISA hole
127 */
128
129 if (floor == 0)
130 fspace = 512*1024;
131 else if (floor == 512*1024)
132 fspace = 128*1024;
133
134 /* And forget ROM space */
135
136 /*
137 * Now install the largest coverage we get
138 */
139 if (fspace > high && fspace > low) {
140 centaur_mcr_insert(ct, floor, fspace, key);
141 floor += fspace;
142 } else if (high > low) {
143 centaur_mcr_insert(ct, top, high, key);
144 top += high;
145 } else if (low > 0) {
146 base -= low;
147 centaur_mcr_insert(ct, base, low, key);
148 } else
149 break;
150 ct++;
151 }
152 /*
153 * We loaded ct values. We now need to set the mask. The caller
154 * must do this bit.
155 */
156 return ct;
157}
158
159static void centaur_create_optimal_mcr(void)
160{
161 int used;
162 int i;
163
164 /*
165 * Allocate up to 6 mcrs to mark as much of ram as possible
166 * as write combining and weak write ordered.
167 *
168 * To experiment with: Linux never uses stack operations for
169 * mmio spaces so we could globally enable stack operation wc
170 *
171 * Load the registers with type 31 - full write combining, all
172 * writes weakly ordered.
173 */
174 used = centaur_mcr_compute(6, 31);
175
176 /*
177 * Wipe unused MCRs
178 */
179 for (i = used; i < 8; i++)
180 wrmsr(MSR_IDT_MCR0+i, 0, 0);
181}
182
183static void winchip2_create_optimal_mcr(void)
184{
185 u32 lo, hi;
186 int used;
187 int i;
188
189 /*
190 * Allocate up to 6 mcrs to mark as much of ram as possible
191 * as write combining, weak store ordered.
192 *
193 * Load the registers with type 25
194 * 8 - weak write ordering
195 * 16 - weak read ordering
196 * 1 - write combining
197 */
198 used = centaur_mcr_compute(6, 25);
199
200 /*
201 * Mark the registers we are using.
202 */
203 rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
204 for (i = 0; i < used; i++)
205 lo |= 1<<(9+i);
206 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
207
208 /*
209 * Wipe unused MCRs
210 */
211
212 for (i = used; i < 8; i++)
213 wrmsr(MSR_IDT_MCR0+i, 0, 0);
214}
215
216/*
217 * Handle the MCR key on the Winchip 2.
218 */
219static void winchip2_unprotect_mcr(void)
220{
221 u32 lo, hi;
222 u32 key;
223
224 rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
225 lo &= ~0x1C0; /* blank bits 8-6 */
226 key = (lo>>17) & 7;
227 lo |= key<<6; /* replace with unlock key */
228 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
229}
230
231static void winchip2_protect_mcr(void)
232{
233 u32 lo, hi;
234
235 rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
236 lo &= ~0x1C0; /* blank bits 8-6 */
237 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
238}
239#endif /* CONFIG_X86_OOSTORE */
240
241#define ACE_PRESENT (1 << 6) 11#define ACE_PRESENT (1 << 6)
242#define ACE_ENABLED (1 << 7) 12#define ACE_ENABLED (1 << 7)
243#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ 13#define ACE_FCR (1 << 28) /* MSR_VIA_FCR */
@@ -362,20 +132,6 @@ static void init_centaur(struct cpuinfo_x86 *c)
362 fcr_clr = DPDC; 132 fcr_clr = DPDC;
363 printk(KERN_NOTICE "Disabling bugged TSC.\n"); 133 printk(KERN_NOTICE "Disabling bugged TSC.\n");
364 clear_cpu_cap(c, X86_FEATURE_TSC); 134 clear_cpu_cap(c, X86_FEATURE_TSC);
365#ifdef CONFIG_X86_OOSTORE
366 centaur_create_optimal_mcr();
367 /*
368 * Enable:
369 * write combining on non-stack, non-string
370 * write combining on string, all types
371 * weak write ordering
372 *
373 * The C6 original lacks weak read order
374 *
375 * Note 0x120 is write only on Winchip 1
376 */
377 wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);
378#endif
379 break; 135 break;
380 case 8: 136 case 8:
381 switch (c->x86_mask) { 137 switch (c->x86_mask) {
@@ -392,40 +148,12 @@ static void init_centaur(struct cpuinfo_x86 *c)
392 fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| 148 fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
393 E2MMX|EAMD3D; 149 E2MMX|EAMD3D;
394 fcr_clr = DPDC; 150 fcr_clr = DPDC;
395#ifdef CONFIG_X86_OOSTORE
396 winchip2_unprotect_mcr();
397 winchip2_create_optimal_mcr();
398 rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
399 /*
400 * Enable:
401 * write combining on non-stack, non-string
402 * write combining on string, all types
403 * weak write ordering
404 */
405 lo |= 31;
406 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
407 winchip2_protect_mcr();
408#endif
409 break; 151 break;
410 case 9: 152 case 9:
411 name = "3"; 153 name = "3";
412 fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| 154 fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
413 E2MMX|EAMD3D; 155 E2MMX|EAMD3D;
414 fcr_clr = DPDC; 156 fcr_clr = DPDC;
415#ifdef CONFIG_X86_OOSTORE
416 winchip2_unprotect_mcr();
417 winchip2_create_optimal_mcr();
418 rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
419 /*
420 * Enable:
421 * write combining on non-stack, non-string
422 * write combining on string, all types
423 * weak write ordering
424 */
425 lo |= 31;
426 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
427 winchip2_protect_mcr();
428#endif
429 break; 157 break;
430 default: 158 default:
431 name = "??"; 159 name = "??";
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 24b6fd10625a..a135239badb7 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -284,8 +284,13 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
284 raw_local_save_flags(eflags); 284 raw_local_save_flags(eflags);
285 BUG_ON(eflags & X86_EFLAGS_AC); 285 BUG_ON(eflags & X86_EFLAGS_AC);
286 286
287 if (cpu_has(c, X86_FEATURE_SMAP)) 287 if (cpu_has(c, X86_FEATURE_SMAP)) {
288#ifdef CONFIG_X86_SMAP
288 set_in_cr4(X86_CR4_SMAP); 289 set_in_cr4(X86_CR4_SMAP);
290#else
291 clear_in_cr4(X86_CR4_SMAP);
292#endif
293 }
289} 294}
290 295
291/* 296/*
@@ -1020,7 +1025,8 @@ __setup("show_msr=", setup_show_msr);
1020 1025
1021static __init int setup_noclflush(char *arg) 1026static __init int setup_noclflush(char *arg)
1022{ 1027{
1023 setup_clear_cpu_cap(X86_FEATURE_CLFLSH); 1028 setup_clear_cpu_cap(X86_FEATURE_CLFLUSH);
1029 setup_clear_cpu_cap(X86_FEATURE_CLFLUSHOPT);
1024 return 1; 1030 return 1;
1025} 1031}
1026__setup("noclflush", setup_noclflush); 1032__setup("noclflush", setup_noclflush);
@@ -1073,6 +1079,10 @@ static __init int setup_disablecpuid(char *arg)
1073} 1079}
1074__setup("clearcpuid=", setup_disablecpuid); 1080__setup("clearcpuid=", setup_disablecpuid);
1075 1081
1082DEFINE_PER_CPU(unsigned long, kernel_stack) =
1083 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
1084EXPORT_PER_CPU_SYMBOL(kernel_stack);
1085
1076#ifdef CONFIG_X86_64 1086#ifdef CONFIG_X86_64
1077struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; 1087struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
1078struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, 1088struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
@@ -1089,10 +1099,6 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
1089 &init_task; 1099 &init_task;
1090EXPORT_PER_CPU_SYMBOL(current_task); 1100EXPORT_PER_CPU_SYMBOL(current_task);
1091 1101
1092DEFINE_PER_CPU(unsigned long, kernel_stack) =
1093 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
1094EXPORT_PER_CPU_SYMBOL(kernel_stack);
1095
1096DEFINE_PER_CPU(char *, irq_stack_ptr) = 1102DEFINE_PER_CPU(char *, irq_stack_ptr) =
1097 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; 1103 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
1098 1104
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index ea56e7c9abe6..a80029035bf2 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -31,11 +31,8 @@ static void early_init_intel(struct cpuinfo_x86 *c)
31 31
32 /* Unmask CPUID levels if masked: */ 32 /* Unmask CPUID levels if masked: */
33 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { 33 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
34 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); 34 if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
35 35 MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) {
36 if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
37 misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
38 wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
39 c->cpuid_level = cpuid_eax(0); 36 c->cpuid_level = cpuid_eax(0);
40 get_cpu_cap(c); 37 get_cpu_cap(c);
41 } 38 }
@@ -129,16 +126,10 @@ static void early_init_intel(struct cpuinfo_x86 *c)
129 * Ingo Molnar reported a Pentium D (model 6) and a Xeon 126 * Ingo Molnar reported a Pentium D (model 6) and a Xeon
130 * (model 2) with the same problem. 127 * (model 2) with the same problem.
131 */ 128 */
132 if (c->x86 == 15) { 129 if (c->x86 == 15)
133 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); 130 if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
134 131 MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0)
135 if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { 132 pr_info("kmemcheck: Disabling fast string operations\n");
136 printk(KERN_INFO "kmemcheck: Disabling fast string operations\n");
137
138 misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING;
139 wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
140 }
141 }
142#endif 133#endif
143 134
144 /* 135 /*
@@ -195,10 +186,16 @@ static void intel_smp_check(struct cpuinfo_x86 *c)
195 } 186 }
196} 187}
197 188
198static void intel_workarounds(struct cpuinfo_x86 *c) 189static int forcepae;
190static int __init forcepae_setup(char *__unused)
199{ 191{
200 unsigned long lo, hi; 192 forcepae = 1;
193 return 1;
194}
195__setup("forcepae", forcepae_setup);
201 196
197static void intel_workarounds(struct cpuinfo_x86 *c)
198{
202#ifdef CONFIG_X86_F00F_BUG 199#ifdef CONFIG_X86_F00F_BUG
203 /* 200 /*
204 * All current models of Pentium and Pentium with MMX technology CPUs 201 * All current models of Pentium and Pentium with MMX technology CPUs
@@ -225,16 +222,26 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
225 clear_cpu_cap(c, X86_FEATURE_SEP); 222 clear_cpu_cap(c, X86_FEATURE_SEP);
226 223
227 /* 224 /*
225 * PAE CPUID issue: many Pentium M report no PAE but may have a
226 * functionally usable PAE implementation.
227 * Forcefully enable PAE if kernel parameter "forcepae" is present.
228 */
229 if (forcepae) {
230 printk(KERN_WARNING "PAE forced!\n");
231 set_cpu_cap(c, X86_FEATURE_PAE);
232 add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE);
233 }
234
235 /*
228 * P4 Xeon errata 037 workaround. 236 * P4 Xeon errata 037 workaround.
229 * Hardware prefetcher may cause stale data to be loaded into the cache. 237 * Hardware prefetcher may cause stale data to be loaded into the cache.
230 */ 238 */
231 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { 239 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
232 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); 240 if (msr_set_bit(MSR_IA32_MISC_ENABLE,
233 if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) { 241 MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
234 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); 242 > 0) {
235 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); 243 pr_info("CPU: C0 stepping P4 Xeon detected.\n");
236 lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE; 244 pr_info("CPU: Disabling hardware prefetching (Errata 037)\n");
237 wrmsr(MSR_IA32_MISC_ENABLE, lo, hi);
238 } 245 }
239 } 246 }
240 247
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 36565373af87..afa9f0d487ea 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -47,45 +47,3 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
47 return NULL; 47 return NULL;
48} 48}
49EXPORT_SYMBOL(x86_match_cpu); 49EXPORT_SYMBOL(x86_match_cpu);
50
51ssize_t arch_print_cpu_modalias(struct device *dev,
52 struct device_attribute *attr,
53 char *bufptr)
54{
55 int size = PAGE_SIZE;
56 int i, n;
57 char *buf = bufptr;
58
59 n = snprintf(buf, size, "x86cpu:vendor:%04X:family:%04X:"
60 "model:%04X:feature:",
61 boot_cpu_data.x86_vendor,
62 boot_cpu_data.x86,
63 boot_cpu_data.x86_model);
64 size -= n;
65 buf += n;
66 size -= 1;
67 for (i = 0; i < NCAPINTS*32; i++) {
68 if (boot_cpu_has(i)) {
69 n = snprintf(buf, size, ",%04X", i);
70 if (n >= size) {
71 WARN(1, "x86 features overflow page\n");
72 break;
73 }
74 size -= n;
75 buf += n;
76 }
77 }
78 *buf++ = '\n';
79 return buf - bufptr;
80}
81
82int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env)
83{
84 char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
85 if (buf) {
86 arch_print_cpu_modalias(NULL, NULL, buf);
87 add_uevent_var(env, "MODALIAS=%s", buf);
88 kfree(buf);
89 }
90 return 0;
91}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 9f7ca266864a..76f98fe5b35c 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -17,6 +17,7 @@
17#include <linux/hardirq.h> 17#include <linux/hardirq.h>
18#include <linux/efi.h> 18#include <linux/efi.h>
19#include <linux/interrupt.h> 19#include <linux/interrupt.h>
20#include <linux/irq.h>
20#include <asm/processor.h> 21#include <asm/processor.h>
21#include <asm/hypervisor.h> 22#include <asm/hypervisor.h>
22#include <asm/hyperv.h> 23#include <asm/hyperv.h>
@@ -26,10 +27,50 @@
26#include <asm/irq_regs.h> 27#include <asm/irq_regs.h>
27#include <asm/i8259.h> 28#include <asm/i8259.h>
28#include <asm/apic.h> 29#include <asm/apic.h>
30#include <asm/timer.h>
29 31
30struct ms_hyperv_info ms_hyperv; 32struct ms_hyperv_info ms_hyperv;
31EXPORT_SYMBOL_GPL(ms_hyperv); 33EXPORT_SYMBOL_GPL(ms_hyperv);
32 34
35#if IS_ENABLED(CONFIG_HYPERV)
36static void (*vmbus_handler)(void);
37
38void hyperv_vector_handler(struct pt_regs *regs)
39{
40 struct pt_regs *old_regs = set_irq_regs(regs);
41
42 irq_enter();
43 exit_idle();
44
45 inc_irq_stat(irq_hv_callback_count);
46 if (vmbus_handler)
47 vmbus_handler();
48
49 irq_exit();
50 set_irq_regs(old_regs);
51}
52
53void hv_setup_vmbus_irq(void (*handler)(void))
54{
55 vmbus_handler = handler;
56 /*
57 * Setup the IDT for hypervisor callback. Prevent reallocation
58 * at module reload.
59 */
60 if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors))
61 alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
62 hyperv_callback_vector);
63}
64
65void hv_remove_vmbus_irq(void)
66{
67 /* We have no way to deallocate the interrupt gate */
68 vmbus_handler = NULL;
69}
70EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq);
71EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
72#endif
73
33static uint32_t __init ms_hyperv_platform(void) 74static uint32_t __init ms_hyperv_platform(void)
34{ 75{
35 u32 eax; 76 u32 eax;
@@ -105,6 +146,11 @@ static void __init ms_hyperv_init_platform(void)
105 146
106 if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) 147 if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
107 clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); 148 clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
149
150#ifdef CONFIG_X86_IO_APIC
151 no_timer_check = 1;
152#endif
153
108} 154}
109 155
110const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { 156const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
@@ -113,41 +159,3 @@ const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
113 .init_platform = ms_hyperv_init_platform, 159 .init_platform = ms_hyperv_init_platform,
114}; 160};
115EXPORT_SYMBOL(x86_hyper_ms_hyperv); 161EXPORT_SYMBOL(x86_hyper_ms_hyperv);
116
117#if IS_ENABLED(CONFIG_HYPERV)
118static int vmbus_irq = -1;
119static irq_handler_t vmbus_isr;
120
121void hv_register_vmbus_handler(int irq, irq_handler_t handler)
122{
123 /*
124 * Setup the IDT for hypervisor callback.
125 */
126 alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
127
128 vmbus_irq = irq;
129 vmbus_isr = handler;
130}
131
132void hyperv_vector_handler(struct pt_regs *regs)
133{
134 struct pt_regs *old_regs = set_irq_regs(regs);
135 struct irq_desc *desc;
136
137 irq_enter();
138 exit_idle();
139
140 desc = irq_to_desc(vmbus_irq);
141
142 if (desc)
143 generic_handle_irq_desc(vmbus_irq, desc);
144
145 irq_exit();
146 set_irq_regs(old_regs);
147}
148#else
149void hv_register_vmbus_handler(int irq, irq_handler_t handler)
150{
151}
152#endif
153EXPORT_SYMBOL_GPL(hv_register_vmbus_handler);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b88645191fe5..ae407f7226c8 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -892,7 +892,6 @@ static void x86_pmu_enable(struct pmu *pmu)
892 * hw_perf_group_sched_in() or x86_pmu_enable() 892 * hw_perf_group_sched_in() or x86_pmu_enable()
893 * 893 *
894 * step1: save events moving to new counters 894 * step1: save events moving to new counters
895 * step2: reprogram moved events into new counters
896 */ 895 */
897 for (i = 0; i < n_running; i++) { 896 for (i = 0; i < n_running; i++) {
898 event = cpuc->event_list[i]; 897 event = cpuc->event_list[i];
@@ -918,6 +917,9 @@ static void x86_pmu_enable(struct pmu *pmu)
918 x86_pmu_stop(event, PERF_EF_UPDATE); 917 x86_pmu_stop(event, PERF_EF_UPDATE);
919 } 918 }
920 919
920 /*
921 * step2: reprogram moved events into new counters
922 */
921 for (i = 0; i < cpuc->n_events; i++) { 923 for (i = 0; i < cpuc->n_events; i++) {
922 event = cpuc->event_list[i]; 924 event = cpuc->event_list[i];
923 hwc = &event->hw; 925 hwc = &event->hw;
@@ -1043,7 +1045,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
1043 /* 1045 /*
1044 * If group events scheduling transaction was started, 1046 * If group events scheduling transaction was started,
1045 * skip the schedulability test here, it will be performed 1047 * skip the schedulability test here, it will be performed
1046 * at commit time (->commit_txn) as a whole 1048 * at commit time (->commit_txn) as a whole.
1047 */ 1049 */
1048 if (cpuc->group_flag & PERF_EVENT_TXN) 1050 if (cpuc->group_flag & PERF_EVENT_TXN)
1049 goto done_collect; 1051 goto done_collect;
@@ -1058,6 +1060,10 @@ static int x86_pmu_add(struct perf_event *event, int flags)
1058 memcpy(cpuc->assign, assign, n*sizeof(int)); 1060 memcpy(cpuc->assign, assign, n*sizeof(int));
1059 1061
1060done_collect: 1062done_collect:
1063 /*
1064 * Commit the collect_events() state. See x86_pmu_del() and
1065 * x86_pmu_*_txn().
1066 */
1061 cpuc->n_events = n; 1067 cpuc->n_events = n;
1062 cpuc->n_added += n - n0; 1068 cpuc->n_added += n - n0;
1063 cpuc->n_txn += n - n0; 1069 cpuc->n_txn += n - n0;
@@ -1183,25 +1189,38 @@ static void x86_pmu_del(struct perf_event *event, int flags)
1183 * If we're called during a txn, we don't need to do anything. 1189 * If we're called during a txn, we don't need to do anything.
1184 * The events never got scheduled and ->cancel_txn will truncate 1190 * The events never got scheduled and ->cancel_txn will truncate
1185 * the event_list. 1191 * the event_list.
1192 *
1193 * XXX assumes any ->del() called during a TXN will only be on
1194 * an event added during that same TXN.
1186 */ 1195 */
1187 if (cpuc->group_flag & PERF_EVENT_TXN) 1196 if (cpuc->group_flag & PERF_EVENT_TXN)
1188 return; 1197 return;
1189 1198
1199 /*
1200 * Not a TXN, therefore cleanup properly.
1201 */
1190 x86_pmu_stop(event, PERF_EF_UPDATE); 1202 x86_pmu_stop(event, PERF_EF_UPDATE);
1191 1203
1192 for (i = 0; i < cpuc->n_events; i++) { 1204 for (i = 0; i < cpuc->n_events; i++) {
1193 if (event == cpuc->event_list[i]) { 1205 if (event == cpuc->event_list[i])
1206 break;
1207 }
1194 1208
1195 if (x86_pmu.put_event_constraints) 1209 if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
1196 x86_pmu.put_event_constraints(cpuc, event); 1210 return;
1197 1211
1198 while (++i < cpuc->n_events) 1212 /* If we have a newly added event; make sure to decrease n_added. */
1199 cpuc->event_list[i-1] = cpuc->event_list[i]; 1213 if (i >= cpuc->n_events - cpuc->n_added)
1214 --cpuc->n_added;
1215
1216 if (x86_pmu.put_event_constraints)
1217 x86_pmu.put_event_constraints(cpuc, event);
1218
1219 /* Delete the array entry. */
1220 while (++i < cpuc->n_events)
1221 cpuc->event_list[i-1] = cpuc->event_list[i];
1222 --cpuc->n_events;
1200 1223
1201 --cpuc->n_events;
1202 break;
1203 }
1204 }
1205 perf_event_update_userpage(event); 1224 perf_event_update_userpage(event);
1206} 1225}
1207 1226
@@ -1521,6 +1540,8 @@ static int __init init_hw_perf_events(void)
1521 1540
1522 pr_cont("%s PMU driver.\n", x86_pmu.name); 1541 pr_cont("%s PMU driver.\n", x86_pmu.name);
1523 1542
1543 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1544
1524 for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) 1545 for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
1525 quirk->func(); 1546 quirk->func();
1526 1547
@@ -1534,7 +1555,6 @@ static int __init init_hw_perf_events(void)
1534 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 1555 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1535 0, x86_pmu.num_counters, 0, 0); 1556 0, x86_pmu.num_counters, 0, 0);
1536 1557
1537 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1538 x86_pmu_format_group.attrs = x86_pmu.format_attrs; 1558 x86_pmu_format_group.attrs = x86_pmu.format_attrs;
1539 1559
1540 if (x86_pmu.event_attrs) 1560 if (x86_pmu.event_attrs)
@@ -1594,7 +1614,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
1594{ 1614{
1595 __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); 1615 __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
1596 /* 1616 /*
1597 * Truncate the collected events. 1617 * Truncate collected array by the number of events added in this
1618 * transaction. See x86_pmu_add() and x86_pmu_*_txn().
1598 */ 1619 */
1599 __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); 1620 __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
1600 __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); 1621 __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
@@ -1605,6 +1626,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
1605 * Commit group events scheduling transaction 1626 * Commit group events scheduling transaction
1606 * Perform the group schedulability test as a whole 1627 * Perform the group schedulability test as a whole
1607 * Return 0 if success 1628 * Return 0 if success
1629 *
1630 * Does not cancel the transaction on failure; expects the caller to do this.
1608 */ 1631 */
1609static int x86_pmu_commit_txn(struct pmu *pmu) 1632static int x86_pmu_commit_txn(struct pmu *pmu)
1610{ 1633{
@@ -1820,9 +1843,12 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
1820 if (ret) 1843 if (ret)
1821 return ret; 1844 return ret;
1822 1845
1846 if (x86_pmu.attr_rdpmc_broken)
1847 return -ENOTSUPP;
1848
1823 if (!!val != !!x86_pmu.attr_rdpmc) { 1849 if (!!val != !!x86_pmu.attr_rdpmc) {
1824 x86_pmu.attr_rdpmc = !!val; 1850 x86_pmu.attr_rdpmc = !!val;
1825 smp_call_function(change_rdpmc, (void *)val, 1); 1851 on_each_cpu(change_rdpmc, (void *)val, 1);
1826 } 1852 }
1827 1853
1828 return count; 1854 return count;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index c1a861829d81..3b2f9bdd974b 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -130,9 +130,11 @@ struct cpu_hw_events {
130 unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 130 unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
131 int enabled; 131 int enabled;
132 132
133 int n_events; 133 int n_events; /* the # of events in the below arrays */
134 int n_added; 134 int n_added; /* the # last events in the below arrays;
135 int n_txn; 135 they've never been enabled yet */
136 int n_txn; /* the # last events in the below arrays;
137 added in the current transaction */
136 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ 138 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
137 u64 tags[X86_PMC_IDX_MAX]; 139 u64 tags[X86_PMC_IDX_MAX];
138 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 140 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
@@ -409,6 +411,7 @@ struct x86_pmu {
409 /* 411 /*
410 * sysfs attrs 412 * sysfs attrs
411 */ 413 */
414 int attr_rdpmc_broken;
412 int attr_rdpmc; 415 int attr_rdpmc;
413 struct attribute **format_attrs; 416 struct attribute **format_attrs;
414 struct attribute **event_attrs; 417 struct attribute **event_attrs;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 0fa4f242f050..aa333d966886 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1361,10 +1361,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1361 intel_pmu_disable_all(); 1361 intel_pmu_disable_all();
1362 handled = intel_pmu_drain_bts_buffer(); 1362 handled = intel_pmu_drain_bts_buffer();
1363 status = intel_pmu_get_status(); 1363 status = intel_pmu_get_status();
1364 if (!status) { 1364 if (!status)
1365 intel_pmu_enable_all(0); 1365 goto done;
1366 return handled;
1367 }
1368 1366
1369 loops = 0; 1367 loops = 0;
1370again: 1368again:
@@ -2310,10 +2308,7 @@ __init int intel_pmu_init(void)
2310 if (version > 1) 2308 if (version > 1)
2311 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); 2309 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
2312 2310
2313 /* 2311 if (boot_cpu_has(X86_FEATURE_PDCM)) {
2314 * v2 and above have a perf capabilities MSR
2315 */
2316 if (version > 1) {
2317 u64 capabilities; 2312 u64 capabilities;
2318 2313
2319 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); 2314 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 29c248799ced..bd2253d40cff 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -66,6 +66,47 @@ DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
66DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); 66DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
67DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); 67DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
68 68
69static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
70static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
71static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
72static void uncore_pmu_event_read(struct perf_event *event);
73
74static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
75{
76 return container_of(event->pmu, struct intel_uncore_pmu, pmu);
77}
78
79static struct intel_uncore_box *
80uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
81{
82 struct intel_uncore_box *box;
83
84 box = *per_cpu_ptr(pmu->box, cpu);
85 if (box)
86 return box;
87
88 raw_spin_lock(&uncore_box_lock);
89 list_for_each_entry(box, &pmu->box_list, list) {
90 if (box->phys_id == topology_physical_package_id(cpu)) {
91 atomic_inc(&box->refcnt);
92 *per_cpu_ptr(pmu->box, cpu) = box;
93 break;
94 }
95 }
96 raw_spin_unlock(&uncore_box_lock);
97
98 return *per_cpu_ptr(pmu->box, cpu);
99}
100
101static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
102{
103 /*
104 * perf core schedules event on the basis of cpu, uncore events are
105 * collected by one of the cpus inside a physical package.
106 */
107 return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
108}
109
69static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) 110static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
70{ 111{
71 u64 count; 112 u64 count;
@@ -501,8 +542,11 @@ static struct extra_reg snbep_uncore_cbox_extra_regs[] = {
501 SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, 542 SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
502 SNBEP_CBO_PMON_CTL_TID_EN, 0x1), 543 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
503 SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), 544 SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
545 SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6),
504 SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), 546 SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
547 SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6),
505 SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), 548 SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
549 SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6),
506 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), 550 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),
507 SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), 551 SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),
508 SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), 552 SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8),
@@ -1178,10 +1222,15 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = {
1178 SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, 1222 SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
1179 SNBEP_CBO_PMON_CTL_TID_EN, 0x1), 1223 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
1180 SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), 1224 SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2),
1225 SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
1226 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
1227 SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc),
1181 SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), 1228 SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
1229 SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc),
1182 SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), 1230 SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
1231 SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc),
1183 SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), 1232 SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
1184 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), 1233 SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc),
1185 SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10), 1234 SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10),
1186 SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10), 1235 SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10),
1187 SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10), 1236 SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10),
@@ -1631,6 +1680,349 @@ static struct intel_uncore_type *snb_msr_uncores[] = {
1631 &snb_uncore_cbox, 1680 &snb_uncore_cbox,
1632 NULL, 1681 NULL,
1633}; 1682};
1683
1684enum {
1685 SNB_PCI_UNCORE_IMC,
1686};
1687
1688static struct uncore_event_desc snb_uncore_imc_events[] = {
1689 INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"),
1690 INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"),
1691 INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"),
1692
1693 INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"),
1694 INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"),
1695 INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"),
1696
1697 { /* end: all zeroes */ },
1698};
1699
1700#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff
1701#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48
1702
1703/* page size multiple covering all config regs */
1704#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000
1705
1706#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1
1707#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050
1708#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2
1709#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054
1710#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE
1711
1712static struct attribute *snb_uncore_imc_formats_attr[] = {
1713 &format_attr_event.attr,
1714 NULL,
1715};
1716
1717static struct attribute_group snb_uncore_imc_format_group = {
1718 .name = "format",
1719 .attrs = snb_uncore_imc_formats_attr,
1720};
1721
1722static void snb_uncore_imc_init_box(struct intel_uncore_box *box)
1723{
1724 struct pci_dev *pdev = box->pci_dev;
1725 int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET;
1726 resource_size_t addr;
1727 u32 pci_dword;
1728
1729 pci_read_config_dword(pdev, where, &pci_dword);
1730 addr = pci_dword;
1731
1732#ifdef CONFIG_PHYS_ADDR_T_64BIT
1733 pci_read_config_dword(pdev, where + 4, &pci_dword);
1734 addr |= ((resource_size_t)pci_dword << 32);
1735#endif
1736
1737 addr &= ~(PAGE_SIZE - 1);
1738
1739 box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
1740 box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL;
1741}
1742
1743static void snb_uncore_imc_enable_box(struct intel_uncore_box *box)
1744{}
1745
1746static void snb_uncore_imc_disable_box(struct intel_uncore_box *box)
1747{}
1748
1749static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event)
1750{}
1751
1752static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event)
1753{}
1754
1755static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
1756{
1757 struct hw_perf_event *hwc = &event->hw;
1758
1759 return (u64)*(unsigned int *)(box->io_addr + hwc->event_base);
1760}
1761
1762/*
1763 * custom event_init() function because we define our own fixed, free
1764 * running counters, so we do not want to conflict with generic uncore
1765 * logic. Also simplifies processing
1766 */
1767static int snb_uncore_imc_event_init(struct perf_event *event)
1768{
1769 struct intel_uncore_pmu *pmu;
1770 struct intel_uncore_box *box;
1771 struct hw_perf_event *hwc = &event->hw;
1772 u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK;
1773 int idx, base;
1774
1775 if (event->attr.type != event->pmu->type)
1776 return -ENOENT;
1777
1778 pmu = uncore_event_to_pmu(event);
1779 /* no device found for this pmu */
1780 if (pmu->func_id < 0)
1781 return -ENOENT;
1782
1783 /* Sampling not supported yet */
1784 if (hwc->sample_period)
1785 return -EINVAL;
1786
1787 /* unsupported modes and filters */
1788 if (event->attr.exclude_user ||
1789 event->attr.exclude_kernel ||
1790 event->attr.exclude_hv ||
1791 event->attr.exclude_idle ||
1792 event->attr.exclude_host ||
1793 event->attr.exclude_guest ||
1794 event->attr.sample_period) /* no sampling */
1795 return -EINVAL;
1796
1797 /*
1798 * Place all uncore events for a particular physical package
1799 * onto a single cpu
1800 */
1801 if (event->cpu < 0)
1802 return -EINVAL;
1803
1804 /* check only supported bits are set */
1805 if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK)
1806 return -EINVAL;
1807
1808 box = uncore_pmu_to_box(pmu, event->cpu);
1809 if (!box || box->cpu < 0)
1810 return -EINVAL;
1811
1812 event->cpu = box->cpu;
1813
1814 event->hw.idx = -1;
1815 event->hw.last_tag = ~0ULL;
1816 event->hw.extra_reg.idx = EXTRA_REG_NONE;
1817 event->hw.branch_reg.idx = EXTRA_REG_NONE;
1818 /*
1819 * check event is known (whitelist, determines counter)
1820 */
1821 switch (cfg) {
1822 case SNB_UNCORE_PCI_IMC_DATA_READS:
1823 base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
1824 idx = UNCORE_PMC_IDX_FIXED;
1825 break;
1826 case SNB_UNCORE_PCI_IMC_DATA_WRITES:
1827 base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
1828 idx = UNCORE_PMC_IDX_FIXED + 1;
1829 break;
1830 default:
1831 return -EINVAL;
1832 }
1833
1834 /* must be done before validate_group */
1835 event->hw.event_base = base;
1836 event->hw.config = cfg;
1837 event->hw.idx = idx;
1838
1839 /* no group validation needed, we have free running counters */
1840
1841 return 0;
1842}
1843
1844static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event)
1845{
1846 return 0;
1847}
1848
1849static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
1850{
1851 struct intel_uncore_box *box = uncore_event_to_box(event);
1852 u64 count;
1853
1854 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1855 return;
1856
1857 event->hw.state = 0;
1858 box->n_active++;
1859
1860 list_add_tail(&event->active_entry, &box->active_list);
1861
1862 count = snb_uncore_imc_read_counter(box, event);
1863 local64_set(&event->hw.prev_count, count);
1864
1865 if (box->n_active == 1)
1866 uncore_pmu_start_hrtimer(box);
1867}
1868
1869static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
1870{
1871 struct intel_uncore_box *box = uncore_event_to_box(event);
1872 struct hw_perf_event *hwc = &event->hw;
1873
1874 if (!(hwc->state & PERF_HES_STOPPED)) {
1875 box->n_active--;
1876
1877 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
1878 hwc->state |= PERF_HES_STOPPED;
1879
1880 list_del(&event->active_entry);
1881
1882 if (box->n_active == 0)
1883 uncore_pmu_cancel_hrtimer(box);
1884 }
1885
1886 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1887 /*
1888 * Drain the remaining delta count out of a event
1889 * that we are disabling:
1890 */
1891 uncore_perf_event_update(box, event);
1892 hwc->state |= PERF_HES_UPTODATE;
1893 }
1894}
1895
1896static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
1897{
1898 struct intel_uncore_box *box = uncore_event_to_box(event);
1899 struct hw_perf_event *hwc = &event->hw;
1900
1901 if (!box)
1902 return -ENODEV;
1903
1904 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1905 if (!(flags & PERF_EF_START))
1906 hwc->state |= PERF_HES_ARCH;
1907
1908 snb_uncore_imc_event_start(event, 0);
1909
1910 box->n_events++;
1911
1912 return 0;
1913}
1914
1915static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
1916{
1917 struct intel_uncore_box *box = uncore_event_to_box(event);
1918 int i;
1919
1920 snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
1921
1922 for (i = 0; i < box->n_events; i++) {
1923 if (event == box->event_list[i]) {
1924 --box->n_events;
1925 break;
1926 }
1927 }
1928}
1929
1930static int snb_pci2phy_map_init(int devid)
1931{
1932 struct pci_dev *dev = NULL;
1933 int bus;
1934
1935 dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev);
1936 if (!dev)
1937 return -ENOTTY;
1938
1939 bus = dev->bus->number;
1940
1941 pcibus_to_physid[bus] = 0;
1942
1943 pci_dev_put(dev);
1944
1945 return 0;
1946}
1947
1948static struct pmu snb_uncore_imc_pmu = {
1949 .task_ctx_nr = perf_invalid_context,
1950 .event_init = snb_uncore_imc_event_init,
1951 .add = snb_uncore_imc_event_add,
1952 .del = snb_uncore_imc_event_del,
1953 .start = snb_uncore_imc_event_start,
1954 .stop = snb_uncore_imc_event_stop,
1955 .read = uncore_pmu_event_read,
1956};
1957
1958static struct intel_uncore_ops snb_uncore_imc_ops = {
1959 .init_box = snb_uncore_imc_init_box,
1960 .enable_box = snb_uncore_imc_enable_box,
1961 .disable_box = snb_uncore_imc_disable_box,
1962 .disable_event = snb_uncore_imc_disable_event,
1963 .enable_event = snb_uncore_imc_enable_event,
1964 .hw_config = snb_uncore_imc_hw_config,
1965 .read_counter = snb_uncore_imc_read_counter,
1966};
1967
1968static struct intel_uncore_type snb_uncore_imc = {
1969 .name = "imc",
1970 .num_counters = 2,
1971 .num_boxes = 1,
1972 .fixed_ctr_bits = 32,
1973 .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE,
1974 .event_descs = snb_uncore_imc_events,
1975 .format_group = &snb_uncore_imc_format_group,
1976 .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
1977 .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK,
1978 .ops = &snb_uncore_imc_ops,
1979 .pmu = &snb_uncore_imc_pmu,
1980};
1981
1982static struct intel_uncore_type *snb_pci_uncores[] = {
1983 [SNB_PCI_UNCORE_IMC] = &snb_uncore_imc,
1984 NULL,
1985};
1986
1987static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = {
1988 { /* IMC */
1989 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC),
1990 .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
1991 },
1992 { /* end: all zeroes */ },
1993};
1994
1995static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = {
1996 { /* IMC */
1997 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
1998 .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
1999 },
2000 { /* end: all zeroes */ },
2001};
2002
2003static DEFINE_PCI_DEVICE_TABLE(hsw_uncore_pci_ids) = {
2004 { /* IMC */
2005 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),
2006 .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
2007 },
2008 { /* end: all zeroes */ },
2009};
2010
2011static struct pci_driver snb_uncore_pci_driver = {
2012 .name = "snb_uncore",
2013 .id_table = snb_uncore_pci_ids,
2014};
2015
2016static struct pci_driver ivb_uncore_pci_driver = {
2017 .name = "ivb_uncore",
2018 .id_table = ivb_uncore_pci_ids,
2019};
2020
2021static struct pci_driver hsw_uncore_pci_driver = {
2022 .name = "hsw_uncore",
2023 .id_table = hsw_uncore_pci_ids,
2024};
2025
1634/* end of Sandy Bridge uncore support */ 2026/* end of Sandy Bridge uncore support */
1635 2027
1636/* Nehalem uncore support */ 2028/* Nehalem uncore support */
@@ -2781,6 +3173,7 @@ again:
2781static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) 3173static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
2782{ 3174{
2783 struct intel_uncore_box *box; 3175 struct intel_uncore_box *box;
3176 struct perf_event *event;
2784 unsigned long flags; 3177 unsigned long flags;
2785 int bit; 3178 int bit;
2786 3179
@@ -2793,19 +3186,27 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
2793 */ 3186 */
2794 local_irq_save(flags); 3187 local_irq_save(flags);
2795 3188
3189 /*
3190 * handle boxes with an active event list as opposed to active
3191 * counters
3192 */
3193 list_for_each_entry(event, &box->active_list, active_entry) {
3194 uncore_perf_event_update(box, event);
3195 }
3196
2796 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) 3197 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
2797 uncore_perf_event_update(box, box->events[bit]); 3198 uncore_perf_event_update(box, box->events[bit]);
2798 3199
2799 local_irq_restore(flags); 3200 local_irq_restore(flags);
2800 3201
2801 hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL)); 3202 hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
2802 return HRTIMER_RESTART; 3203 return HRTIMER_RESTART;
2803} 3204}
2804 3205
2805static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) 3206static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
2806{ 3207{
2807 __hrtimer_start_range_ns(&box->hrtimer, 3208 __hrtimer_start_range_ns(&box->hrtimer,
2808 ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0, 3209 ns_to_ktime(box->hrtimer_duration), 0,
2809 HRTIMER_MODE_REL_PINNED, 0); 3210 HRTIMER_MODE_REL_PINNED, 0);
2810} 3211}
2811 3212
@@ -2839,43 +3240,12 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
2839 box->cpu = -1; 3240 box->cpu = -1;
2840 box->phys_id = -1; 3241 box->phys_id = -1;
2841 3242
2842 return box; 3243 /* set default hrtimer timeout */
2843} 3244 box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
2844
2845static struct intel_uncore_box *
2846uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
2847{
2848 struct intel_uncore_box *box;
2849
2850 box = *per_cpu_ptr(pmu->box, cpu);
2851 if (box)
2852 return box;
2853 3245
2854 raw_spin_lock(&uncore_box_lock); 3246 INIT_LIST_HEAD(&box->active_list);
2855 list_for_each_entry(box, &pmu->box_list, list) {
2856 if (box->phys_id == topology_physical_package_id(cpu)) {
2857 atomic_inc(&box->refcnt);
2858 *per_cpu_ptr(pmu->box, cpu) = box;
2859 break;
2860 }
2861 }
2862 raw_spin_unlock(&uncore_box_lock);
2863 3247
2864 return *per_cpu_ptr(pmu->box, cpu); 3248 return box;
2865}
2866
2867static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
2868{
2869 return container_of(event->pmu, struct intel_uncore_pmu, pmu);
2870}
2871
2872static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
2873{
2874 /*
2875 * perf core schedules event on the basis of cpu, uncore events are
2876 * collected by one of the cpus inside a physical package.
2877 */
2878 return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
2879} 3249}
2880 3250
2881static int 3251static int
@@ -3271,16 +3641,21 @@ static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
3271{ 3641{
3272 int ret; 3642 int ret;
3273 3643
3274 pmu->pmu = (struct pmu) { 3644 if (!pmu->type->pmu) {
3275 .attr_groups = pmu->type->attr_groups, 3645 pmu->pmu = (struct pmu) {
3276 .task_ctx_nr = perf_invalid_context, 3646 .attr_groups = pmu->type->attr_groups,
3277 .event_init = uncore_pmu_event_init, 3647 .task_ctx_nr = perf_invalid_context,
3278 .add = uncore_pmu_event_add, 3648 .event_init = uncore_pmu_event_init,
3279 .del = uncore_pmu_event_del, 3649 .add = uncore_pmu_event_add,
3280 .start = uncore_pmu_event_start, 3650 .del = uncore_pmu_event_del,
3281 .stop = uncore_pmu_event_stop, 3651 .start = uncore_pmu_event_start,
3282 .read = uncore_pmu_event_read, 3652 .stop = uncore_pmu_event_stop,
3283 }; 3653 .read = uncore_pmu_event_read,
3654 };
3655 } else {
3656 pmu->pmu = *pmu->type->pmu;
3657 pmu->pmu.attr_groups = pmu->type->attr_groups;
3658 }
3284 3659
3285 if (pmu->type->num_boxes == 1) { 3660 if (pmu->type->num_boxes == 1) {
3286 if (strlen(pmu->type->name) > 0) 3661 if (strlen(pmu->type->name) > 0)
@@ -3326,6 +3701,8 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
3326 if (!pmus) 3701 if (!pmus)
3327 return -ENOMEM; 3702 return -ENOMEM;
3328 3703
3704 type->pmus = pmus;
3705
3329 type->unconstrainted = (struct event_constraint) 3706 type->unconstrainted = (struct event_constraint)
3330 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 3707 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
3331 0, type->num_counters, 0, 0); 3708 0, type->num_counters, 0, 0);
@@ -3361,7 +3738,6 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
3361 } 3738 }
3362 3739
3363 type->pmu_group = &uncore_pmu_attr_group; 3740 type->pmu_group = &uncore_pmu_attr_group;
3364 type->pmus = pmus;
3365 return 0; 3741 return 0;
3366fail: 3742fail:
3367 uncore_type_exit(type); 3743 uncore_type_exit(type);
@@ -3493,6 +3869,28 @@ static int __init uncore_pci_init(void)
3493 pci_uncores = ivt_pci_uncores; 3869 pci_uncores = ivt_pci_uncores;
3494 uncore_pci_driver = &ivt_uncore_pci_driver; 3870 uncore_pci_driver = &ivt_uncore_pci_driver;
3495 break; 3871 break;
3872 case 42: /* Sandy Bridge */
3873 ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_SNB_IMC);
3874 if (ret)
3875 return ret;
3876 pci_uncores = snb_pci_uncores;
3877 uncore_pci_driver = &snb_uncore_pci_driver;
3878 break;
3879 case 58: /* Ivy Bridge */
3880 ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_IVB_IMC);
3881 if (ret)
3882 return ret;
3883 pci_uncores = snb_pci_uncores;
3884 uncore_pci_driver = &ivb_uncore_pci_driver;
3885 break;
3886 case 60: /* Haswell */
3887 case 69: /* Haswell Celeron */
3888 ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_HSW_IMC);
3889 if (ret)
3890 return ret;
3891 pci_uncores = snb_pci_uncores;
3892 uncore_pci_driver = &hsw_uncore_pci_driver;
3893 break;
3496 default: 3894 default:
3497 return 0; 3895 return 0;
3498 } 3896 }
@@ -3764,7 +4162,7 @@ static void __init uncore_cpu_setup(void *dummy)
3764 4162
3765static int __init uncore_cpu_init(void) 4163static int __init uncore_cpu_init(void)
3766{ 4164{
3767 int ret, cpu, max_cores; 4165 int ret, max_cores;
3768 4166
3769 max_cores = boot_cpu_data.x86_max_cores; 4167 max_cores = boot_cpu_data.x86_max_cores;
3770 switch (boot_cpu_data.x86_model) { 4168 switch (boot_cpu_data.x86_model) {
@@ -3808,29 +4206,6 @@ static int __init uncore_cpu_init(void)
3808 if (ret) 4206 if (ret)
3809 return ret; 4207 return ret;
3810 4208
3811 get_online_cpus();
3812
3813 for_each_online_cpu(cpu) {
3814 int i, phys_id = topology_physical_package_id(cpu);
3815
3816 for_each_cpu(i, &uncore_cpu_mask) {
3817 if (phys_id == topology_physical_package_id(i)) {
3818 phys_id = -1;
3819 break;
3820 }
3821 }
3822 if (phys_id < 0)
3823 continue;
3824
3825 uncore_cpu_prepare(cpu, phys_id);
3826 uncore_event_init_cpu(cpu);
3827 }
3828 on_each_cpu(uncore_cpu_setup, NULL, 1);
3829
3830 register_cpu_notifier(&uncore_cpu_nb);
3831
3832 put_online_cpus();
3833
3834 return 0; 4209 return 0;
3835} 4210}
3836 4211
@@ -3859,6 +4234,41 @@ static int __init uncore_pmus_register(void)
3859 return 0; 4234 return 0;
3860} 4235}
3861 4236
4237static void __init uncore_cpumask_init(void)
4238{
4239 int cpu;
4240
4241 /*
4242 * ony invoke once from msr or pci init code
4243 */
4244 if (!cpumask_empty(&uncore_cpu_mask))
4245 return;
4246
4247 get_online_cpus();
4248
4249 for_each_online_cpu(cpu) {
4250 int i, phys_id = topology_physical_package_id(cpu);
4251
4252 for_each_cpu(i, &uncore_cpu_mask) {
4253 if (phys_id == topology_physical_package_id(i)) {
4254 phys_id = -1;
4255 break;
4256 }
4257 }
4258 if (phys_id < 0)
4259 continue;
4260
4261 uncore_cpu_prepare(cpu, phys_id);
4262 uncore_event_init_cpu(cpu);
4263 }
4264 on_each_cpu(uncore_cpu_setup, NULL, 1);
4265
4266 register_cpu_notifier(&uncore_cpu_nb);
4267
4268 put_online_cpus();
4269}
4270
4271
3862static int __init intel_uncore_init(void) 4272static int __init intel_uncore_init(void)
3863{ 4273{
3864 int ret; 4274 int ret;
@@ -3877,6 +4287,7 @@ static int __init intel_uncore_init(void)
3877 uncore_pci_exit(); 4287 uncore_pci_exit();
3878 goto fail; 4288 goto fail;
3879 } 4289 }
4290 uncore_cpumask_init();
3880 4291
3881 uncore_pmus_register(); 4292 uncore_pmus_register();
3882 return 0; 4293 return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index a80ab71a883d..90236f0c94a9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -6,6 +6,7 @@
6 6
7#define UNCORE_PMU_NAME_LEN 32 7#define UNCORE_PMU_NAME_LEN 32
8#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC) 8#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC)
9#define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC)
9 10
10#define UNCORE_FIXED_EVENT 0xff 11#define UNCORE_FIXED_EVENT 0xff
11#define UNCORE_PMC_IDX_MAX_GENERIC 8 12#define UNCORE_PMC_IDX_MAX_GENERIC 8
@@ -440,6 +441,7 @@ struct intel_uncore_type {
440 struct intel_uncore_ops *ops; 441 struct intel_uncore_ops *ops;
441 struct uncore_event_desc *event_descs; 442 struct uncore_event_desc *event_descs;
442 const struct attribute_group *attr_groups[4]; 443 const struct attribute_group *attr_groups[4];
444 struct pmu *pmu; /* for custom pmu ops */
443}; 445};
444 446
445#define pmu_group attr_groups[0] 447#define pmu_group attr_groups[0]
@@ -488,8 +490,11 @@ struct intel_uncore_box {
488 u64 tags[UNCORE_PMC_IDX_MAX]; 490 u64 tags[UNCORE_PMC_IDX_MAX];
489 struct pci_dev *pci_dev; 491 struct pci_dev *pci_dev;
490 struct intel_uncore_pmu *pmu; 492 struct intel_uncore_pmu *pmu;
493 u64 hrtimer_duration; /* hrtimer timeout for this box */
491 struct hrtimer hrtimer; 494 struct hrtimer hrtimer;
492 struct list_head list; 495 struct list_head list;
496 struct list_head active_list;
497 void *io_addr;
493 struct intel_uncore_extra_reg shared_regs[0]; 498 struct intel_uncore_extra_reg shared_regs[0];
494}; 499};
495 500
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 3486e6660357..5d466b7d8609 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1257,7 +1257,24 @@ again:
1257 pass++; 1257 pass++;
1258 goto again; 1258 goto again;
1259 } 1259 }
1260 1260 /*
1261 * Perf does test runs to see if a whole group can be assigned
1262 * together succesfully. There can be multiple rounds of this.
1263 * Unfortunately, p4_pmu_swap_config_ts touches the hwc->config
1264 * bits, such that the next round of group assignments will
1265 * cause the above p4_should_swap_ts to pass instead of fail.
1266 * This leads to counters exclusive to thread0 being used by
1267 * thread1.
1268 *
1269 * Solve this with a cheap hack, reset the idx back to -1 to
1270 * force a new lookup (p4_next_cntr) to get the right counter
1271 * for the right thread.
1272 *
1273 * This probably doesn't comply with the general spirit of how
1274 * perf wants to work, but P4 is special. :-(
1275 */
1276 if (p4_should_swap_ts(hwc->config, cpu))
1277 hwc->idx = -1;
1261 p4_pmu_swap_config_ts(hwc, cpu); 1278 p4_pmu_swap_config_ts(hwc, cpu);
1262 if (assign) 1279 if (assign)
1263 assign[i] = cntr_idx; 1280 assign[i] = cntr_idx;
@@ -1322,6 +1339,7 @@ static __initconst const struct x86_pmu p4_pmu = {
1322__init int p4_pmu_init(void) 1339__init int p4_pmu_init(void)
1323{ 1340{
1324 unsigned int low, high; 1341 unsigned int low, high;
1342 int i, reg;
1325 1343
1326 /* If we get stripped -- indexing fails */ 1344 /* If we get stripped -- indexing fails */
1327 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC); 1345 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);
@@ -1340,5 +1358,19 @@ __init int p4_pmu_init(void)
1340 1358
1341 x86_pmu = p4_pmu; 1359 x86_pmu = p4_pmu;
1342 1360
1361 /*
1362 * Even though the counters are configured to interrupt a particular
1363 * logical processor when an overflow happens, testing has shown that
1364 * on kdump kernels (which uses a single cpu), thread1's counter
1365 * continues to run and will report an NMI on thread0. Due to the
1366 * overflow bug, this leads to a stream of unknown NMIs.
1367 *
1368 * Solve this by zero'ing out the registers to mimic a reset.
1369 */
1370 for (i = 0; i < x86_pmu.num_counters; i++) {
1371 reg = x86_pmu_config_addr(i);
1372 wrmsrl_safe(reg, 0ULL);
1373 }
1374
1343 return 0; 1375 return 0;
1344} 1376}
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index b1e2fe115323..7c1a0c07b607 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -231,31 +231,49 @@ static __initconst const struct x86_pmu p6_pmu = {
231 231
232}; 232};
233 233
234static __init void p6_pmu_rdpmc_quirk(void)
235{
236 if (boot_cpu_data.x86_mask < 9) {
237 /*
238 * PPro erratum 26; fixed in stepping 9 and above.
239 */
240 pr_warn("Userspace RDPMC support disabled due to a CPU erratum\n");
241 x86_pmu.attr_rdpmc_broken = 1;
242 x86_pmu.attr_rdpmc = 0;
243 }
244}
245
234__init int p6_pmu_init(void) 246__init int p6_pmu_init(void)
235{ 247{
248 x86_pmu = p6_pmu;
249
236 switch (boot_cpu_data.x86_model) { 250 switch (boot_cpu_data.x86_model) {
237 case 1: 251 case 1: /* Pentium Pro */
238 case 3: /* Pentium Pro */ 252 x86_add_quirk(p6_pmu_rdpmc_quirk);
239 case 5: 253 break;
240 case 6: /* Pentium II */ 254
241 case 7: 255 case 3: /* Pentium II - Klamath */
242 case 8: 256 case 5: /* Pentium II - Deschutes */
243 case 11: /* Pentium III */ 257 case 6: /* Pentium II - Mendocino */
244 case 9:
245 case 13:
246 /* Pentium M */
247 break; 258 break;
259
260 case 7: /* Pentium III - Katmai */
261 case 8: /* Pentium III - Coppermine */
262 case 10: /* Pentium III Xeon */
263 case 11: /* Pentium III - Tualatin */
264 break;
265
266 case 9: /* Pentium M - Banias */
267 case 13: /* Pentium M - Dothan */
268 break;
269
248 default: 270 default:
249 pr_cont("unsupported p6 CPU model %d ", 271 pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model);
250 boot_cpu_data.x86_model);
251 return -ENODEV; 272 return -ENODEV;
252 } 273 }
253 274
254 x86_pmu = p6_pmu;
255
256 memcpy(hw_cache_event_ids, p6_hw_cache_event_ids, 275 memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
257 sizeof(hw_cache_event_ids)); 276 sizeof(hw_cache_event_ids));
258 277
259
260 return 0; 278 return 0;
261} 279}