aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-03-13 06:05:58 -0400
committerIngo Molnar <mingo@elte.hu>2009-03-13 06:05:58 -0400
commitcd80a8142efa3468c2cd9fb52845f334c3220d54 (patch)
tree919e88994bd3c09b34ce852d0a09bb0655d231d0 /arch/x86/kernel/cpu
parent641cd4cfcdc71ce01535b31cc4d57d59a1fae1fc (diff)
parenta98fe7f3425c6b4e90de16f8da63b0429a8fed08 (diff)
Merge branch 'x86/core' into core/ipi
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c54
-rw-r--r--arch/x86/kernel/cpu/amd.c54
-rw-r--r--arch/x86/kernel/cpu/common.c263
-rwxr-xr-xarch/x86/kernel/cpu/cpu_debug.c785
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/e_powersaver.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c6
-rw-r--r--arch/x86/kernel/cpu/intel.c42
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c63
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_32.c14
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c530
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c43
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c214
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c29
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c2
-rw-r--r--arch/x86/kernel/cpu/proc.c20
19 files changed, 1739 insertions, 395 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 82db7f45e2d..d4356f8b752 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -14,6 +14,8 @@ obj-y += vmware.o hypervisor.o
14obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o 14obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
15obj-$(CONFIG_X86_64) += bugs_64.o 15obj-$(CONFIG_X86_64) += bugs_64.o
16 16
17obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o
18
17obj-$(CONFIG_CPU_SUP_INTEL) += intel.o 19obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
18obj-$(CONFIG_CPU_SUP_AMD) += amd.o 20obj-$(CONFIG_CPU_SUP_AMD) += amd.o
19obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o 21obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 2cf23634b6d..6882a735d9c 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -7,7 +7,7 @@
7#include <asm/pat.h> 7#include <asm/pat.h>
8#include <asm/processor.h> 8#include <asm/processor.h>
9 9
10#include <mach_apic.h> 10#include <asm/apic.h>
11 11
12struct cpuid_bit { 12struct cpuid_bit {
13 u16 feature; 13 u16 feature;
@@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
69 */ 69 */
70void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) 70void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
71{ 71{
72#ifdef CONFIG_X86_SMP 72#ifdef CONFIG_SMP
73 unsigned int eax, ebx, ecx, edx, sub_index; 73 unsigned int eax, ebx, ecx, edx, sub_index;
74 unsigned int ht_mask_width, core_plus_mask_width; 74 unsigned int ht_mask_width, core_plus_mask_width;
75 unsigned int core_select_mask, core_level_siblings; 75 unsigned int core_select_mask, core_level_siblings;
@@ -116,22 +116,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
116 116
117 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; 117 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
118 118
119#ifdef CONFIG_X86_32 119 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
120 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
121 & core_select_mask; 120 & core_select_mask;
122 c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); 121 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
123 /* 122 /*
124 * Reinit the apicid, now that we have extended initial_apicid. 123 * Reinit the apicid, now that we have extended initial_apicid.
125 */ 124 */
126 c->apicid = phys_pkg_id(c->initial_apicid, 0); 125 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
127#else 126
128 c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
129 c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
130 /*
131 * Reinit the apicid, now that we have extended initial_apicid.
132 */
133 c->apicid = phys_pkg_id(0);
134#endif
135 c->x86_max_cores = (core_level_siblings / smp_num_siblings); 127 c->x86_max_cores = (core_level_siblings / smp_num_siblings);
136 128
137 129
@@ -143,37 +135,3 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
143 return; 135 return;
144#endif 136#endif
145} 137}
146
147#ifdef CONFIG_X86_PAT
148void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
149{
150 if (!cpu_has_pat)
151 pat_disable("PAT not supported by CPU.");
152
153 switch (c->x86_vendor) {
154 case X86_VENDOR_INTEL:
155 /*
156 * There is a known erratum on Pentium III and Core Solo
157 * and Core Duo CPUs.
158 * " Page with PAT set to WC while associated MTRR is UC
159 * may consolidate to UC "
160 * Because of this erratum, it is better to stick with
161 * setting WC in MTRR rather than using PAT on these CPUs.
162 *
163 * Enable PAT WC only on P4, Core 2 or later CPUs.
164 */
165 if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15))
166 return;
167
168 pat_disable("PAT WC disabled due to known CPU erratum.");
169 return;
170
171 case X86_VENDOR_AMD:
172 case X86_VENDOR_CENTAUR:
173 case X86_VENDOR_TRANSMETA:
174 return;
175 }
176
177 pat_disable("PAT disabled. Not yet verified on this CPU type.");
178}
179#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c878f6aa91..f47df59016c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
5#include <asm/io.h> 5#include <asm/io.h>
6#include <asm/processor.h> 6#include <asm/processor.h>
7#include <asm/apic.h> 7#include <asm/apic.h>
8#include <asm/cpu.h>
8 9
9#ifdef CONFIG_X86_64 10#ifdef CONFIG_X86_64
10# include <asm/numa_64.h> 11# include <asm/numa_64.h>
@@ -12,8 +13,6 @@
12# include <asm/cacheflush.h> 13# include <asm/cacheflush.h>
13#endif 14#endif
14 15
15#include <mach_apic.h>
16
17#include "cpu.h" 16#include "cpu.h"
18 17
19#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
@@ -143,6 +142,55 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
143 } 142 }
144} 143}
145 144
145static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
146{
147#ifdef CONFIG_SMP
148 /* calling is from identify_secondary_cpu() ? */
149 if (c->cpu_index == boot_cpu_id)
150 return;
151
152 /*
153 * Certain Athlons might work (for various values of 'work') in SMP
154 * but they are not certified as MP capable.
155 */
156 /* Athlon 660/661 is valid. */
157 if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
158 (c->x86_mask == 1)))
159 goto valid_k7;
160
161 /* Duron 670 is valid */
162 if ((c->x86_model == 7) && (c->x86_mask == 0))
163 goto valid_k7;
164
165 /*
166 * Athlon 662, Duron 671, and Athlon >model 7 have capability
167 * bit. It's worth noting that the A5 stepping (662) of some
168 * Athlon XP's have the MP bit set.
169 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
170 * more.
171 */
172 if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
173 ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
174 (c->x86_model > 7))
175 if (cpu_has_mp)
176 goto valid_k7;
177
178 /* If we get here, not a certified SMP capable AMD system. */
179
180 /*
181 * Don't taint if we are running SMP kernel on a single non-MP
182 * approved Athlon
183 */
184 WARN_ONCE(1, "WARNING: This combination of AMD"
185 "processors is not suitable for SMP.\n");
186 if (!test_taint(TAINT_UNSAFE_SMP))
187 add_taint(TAINT_UNSAFE_SMP);
188
189valid_k7:
190 ;
191#endif
192}
193
146static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) 194static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
147{ 195{
148 u32 l, h; 196 u32 l, h;
@@ -177,6 +225,8 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
177 } 225 }
178 226
179 set_cpu_cap(c, X86_FEATURE_K7); 227 set_cpu_cap(c, X86_FEATURE_K7);
228
229 amd_k7_smp_check(c);
180} 230}
181#endif 231#endif
182 232
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 83492b1f93b..f8869978bbb 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -21,14 +21,14 @@
21#include <asm/asm.h> 21#include <asm/asm.h>
22#include <asm/numa.h> 22#include <asm/numa.h>
23#include <asm/smp.h> 23#include <asm/smp.h>
24#ifdef CONFIG_X86_LOCAL_APIC 24#include <asm/cpu.h>
25#include <asm/mpspec.h> 25#include <asm/cpumask.h>
26#include <asm/apic.h> 26#include <asm/apic.h>
27#include <mach_apic.h> 27
28#include <asm/genapic.h> 28#ifdef CONFIG_X86_LOCAL_APIC
29#include <asm/uv/uv.h>
29#endif 30#endif
30 31
31#include <asm/pda.h>
32#include <asm/pgtable.h> 32#include <asm/pgtable.h>
33#include <asm/processor.h> 33#include <asm/processor.h>
34#include <asm/desc.h> 34#include <asm/desc.h>
@@ -37,6 +37,7 @@
37#include <asm/sections.h> 37#include <asm/sections.h>
38#include <asm/setup.h> 38#include <asm/setup.h>
39#include <asm/hypervisor.h> 39#include <asm/hypervisor.h>
40#include <asm/stackprotector.h>
40 41
41#include "cpu.h" 42#include "cpu.h"
42 43
@@ -50,6 +51,15 @@ cpumask_var_t cpu_initialized_mask;
50/* representing cpus for which sibling maps can be computed */ 51/* representing cpus for which sibling maps can be computed */
51cpumask_var_t cpu_sibling_setup_mask; 52cpumask_var_t cpu_sibling_setup_mask;
52 53
54/* correctly size the local cpu masks */
55void __init setup_cpu_local_masks(void)
56{
57 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
58 alloc_bootmem_cpumask_var(&cpu_callin_mask);
59 alloc_bootmem_cpumask_var(&cpu_callout_mask);
60 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
61}
62
53#else /* CONFIG_X86_32 */ 63#else /* CONFIG_X86_32 */
54 64
55cpumask_t cpu_callin_map; 65cpumask_t cpu_callin_map;
@@ -62,23 +72,23 @@ cpumask_t cpu_sibling_setup_map;
62 72
63static struct cpu_dev *this_cpu __cpuinitdata; 73static struct cpu_dev *this_cpu __cpuinitdata;
64 74
75DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
65#ifdef CONFIG_X86_64 76#ifdef CONFIG_X86_64
66/* We need valid kernel segments for data and code in long mode too 77 /*
67 * IRET will check the segment types kkeil 2000/10/28 78 * We need valid kernel segments for data and code in long mode too
68 * Also sysret mandates a special GDT layout 79 * IRET will check the segment types kkeil 2000/10/28
69 */ 80 * Also sysret mandates a special GDT layout
70/* The TLS descriptors are currently at a different place compared to i386. 81 *
71 Hopefully nobody expects them at a fixed place (Wine?) */ 82 * The TLS descriptors are currently at a different place compared to i386.
72DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { 83 * Hopefully nobody expects them at a fixed place (Wine?)
84 */
73 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, 85 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
74 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, 86 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
75 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, 87 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
76 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, 88 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
77 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, 89 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
78 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, 90 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
79} };
80#else 91#else
81DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
82 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 92 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
83 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 93 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
84 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 94 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -110,9 +120,10 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
110 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 120 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
111 121
112 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 122 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
113 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, 123 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
114} }; 124 GDT_STACK_CANARY_INIT
115#endif 125#endif
126} };
116EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 127EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
117 128
118#ifdef CONFIG_X86_32 129#ifdef CONFIG_X86_32
@@ -213,6 +224,49 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
213#endif 224#endif
214 225
215/* 226/*
227 * Some CPU features depend on higher CPUID levels, which may not always
228 * be available due to CPUID level capping or broken virtualization
229 * software. Add those features to this table to auto-disable them.
230 */
231struct cpuid_dependent_feature {
232 u32 feature;
233 u32 level;
234};
235static const struct cpuid_dependent_feature __cpuinitconst
236cpuid_dependent_features[] = {
237 { X86_FEATURE_MWAIT, 0x00000005 },
238 { X86_FEATURE_DCA, 0x00000009 },
239 { X86_FEATURE_XSAVE, 0x0000000d },
240 { 0, 0 }
241};
242
243static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
244{
245 const struct cpuid_dependent_feature *df;
246 for (df = cpuid_dependent_features; df->feature; df++) {
247 /*
248 * Note: cpuid_level is set to -1 if unavailable, but
249 * extended_extended_level is set to 0 if unavailable
250 * and the legitimate extended levels are all negative
251 * when signed; hence the weird messing around with
252 * signs here...
253 */
254 if (cpu_has(c, df->feature) &&
255 ((s32)df->level < 0 ?
256 (u32)df->level > (u32)c->extended_cpuid_level :
257 (s32)df->level > (s32)c->cpuid_level)) {
258 clear_cpu_cap(c, df->feature);
259 if (warn)
260 printk(KERN_WARNING
261 "CPU: CPU feature %s disabled "
262 "due to lack of CPUID level 0x%x\n",
263 x86_cap_flags[df->feature],
264 df->level);
265 }
266 }
267}
268
269/*
216 * Naming convention should be: <Name> [(<Codename>)] 270 * Naming convention should be: <Name> [(<Codename>)]
217 * This table only is used unless init_<vendor>() below doesn't set it; 271 * This table only is used unless init_<vendor>() below doesn't set it;
218 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used 272 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
@@ -242,18 +296,29 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
242 296
243__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; 297__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
244 298
299void load_percpu_segment(int cpu)
300{
301#ifdef CONFIG_X86_32
302 loadsegment(fs, __KERNEL_PERCPU);
303#else
304 loadsegment(gs, 0);
305 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
306#endif
307 load_stack_canary_segment();
308}
309
245/* Current gdt points %fs at the "master" per-cpu area: after this, 310/* Current gdt points %fs at the "master" per-cpu area: after this,
246 * it's on the real one. */ 311 * it's on the real one. */
247void switch_to_new_gdt(void) 312void switch_to_new_gdt(int cpu)
248{ 313{
249 struct desc_ptr gdt_descr; 314 struct desc_ptr gdt_descr;
250 315
251 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); 316 gdt_descr.address = (long)get_cpu_gdt_table(cpu);
252 gdt_descr.size = GDT_SIZE - 1; 317 gdt_descr.size = GDT_SIZE - 1;
253 load_gdt(&gdt_descr); 318 load_gdt(&gdt_descr);
254#ifdef CONFIG_X86_32 319 /* Reload the per-cpu base */
255 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); 320
256#endif 321 load_percpu_segment(cpu);
257} 322}
258 323
259static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; 324static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
@@ -383,11 +448,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
383 } 448 }
384 449
385 index_msb = get_count_order(smp_num_siblings); 450 index_msb = get_count_order(smp_num_siblings);
386#ifdef CONFIG_X86_64 451 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
387 c->phys_proc_id = phys_pkg_id(index_msb);
388#else
389 c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
390#endif
391 452
392 smp_num_siblings = smp_num_siblings / c->x86_max_cores; 453 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
393 454
@@ -395,13 +456,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
395 456
396 core_bits = get_count_order(c->x86_max_cores); 457 core_bits = get_count_order(c->x86_max_cores);
397 458
398#ifdef CONFIG_X86_64 459 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
399 c->cpu_core_id = phys_pkg_id(index_msb) &
400 ((1 << core_bits) - 1); 460 ((1 << core_bits) - 1);
401#else
402 c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
403 ((1 << core_bits) - 1);
404#endif
405 } 461 }
406 462
407out: 463out:
@@ -570,11 +626,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
570 if (this_cpu->c_early_init) 626 if (this_cpu->c_early_init)
571 this_cpu->c_early_init(c); 627 this_cpu->c_early_init(c);
572 628
573 validate_pat_support(c);
574
575#ifdef CONFIG_SMP 629#ifdef CONFIG_SMP
576 c->cpu_index = boot_cpu_id; 630 c->cpu_index = boot_cpu_id;
577#endif 631#endif
632 filter_cpuid_features(c, false);
578} 633}
579 634
580void __init early_cpu_init(void) 635void __init early_cpu_init(void)
@@ -637,7 +692,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
637 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; 692 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
638#ifdef CONFIG_X86_32 693#ifdef CONFIG_X86_32
639# ifdef CONFIG_X86_HT 694# ifdef CONFIG_X86_HT
640 c->apicid = phys_pkg_id(c->initial_apicid, 0); 695 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
641# else 696# else
642 c->apicid = c->initial_apicid; 697 c->apicid = c->initial_apicid;
643# endif 698# endif
@@ -684,7 +739,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
684 this_cpu->c_identify(c); 739 this_cpu->c_identify(c);
685 740
686#ifdef CONFIG_X86_64 741#ifdef CONFIG_X86_64
687 c->apicid = phys_pkg_id(0); 742 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
688#endif 743#endif
689 744
690 /* 745 /*
@@ -708,6 +763,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
708 * we do "generic changes." 763 * we do "generic changes."
709 */ 764 */
710 765
766 /* Filter out anything that depends on CPUID levels we don't have */
767 filter_cpuid_features(c, true);
768
711 /* If the model name is still unset, do table lookup. */ 769 /* If the model name is still unset, do table lookup. */
712 if (!c->x86_model_id[0]) { 770 if (!c->x86_model_id[0]) {
713 char *p; 771 char *p;
@@ -877,54 +935,22 @@ static __init int setup_disablecpuid(char *arg)
877__setup("clearcpuid=", setup_disablecpuid); 935__setup("clearcpuid=", setup_disablecpuid);
878 936
879#ifdef CONFIG_X86_64 937#ifdef CONFIG_X86_64
880struct x8664_pda **_cpu_pda __read_mostly;
881EXPORT_SYMBOL(_cpu_pda);
882
883struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 938struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
884 939
885static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; 940DEFINE_PER_CPU_FIRST(union irq_stack_union,
941 irq_stack_union) __aligned(PAGE_SIZE);
942DEFINE_PER_CPU(char *, irq_stack_ptr) =
943 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
886 944
887void __cpuinit pda_init(int cpu) 945DEFINE_PER_CPU(unsigned long, kernel_stack) =
888{ 946 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
889 struct x8664_pda *pda = cpu_pda(cpu); 947EXPORT_PER_CPU_SYMBOL(kernel_stack);
890 948
891 /* Setup up data that may be needed in __get_free_pages early */ 949DEFINE_PER_CPU(unsigned int, irq_count) = -1;
892 loadsegment(fs, 0);
893 loadsegment(gs, 0);
894 /* Memory clobbers used to order PDA accessed */
895 mb();
896 wrmsrl(MSR_GS_BASE, pda);
897 mb();
898
899 pda->cpunumber = cpu;
900 pda->irqcount = -1;
901 pda->kernelstack = (unsigned long)stack_thread_info() -
902 PDA_STACKOFFSET + THREAD_SIZE;
903 pda->active_mm = &init_mm;
904 pda->mmu_state = 0;
905
906 if (cpu == 0) {
907 /* others are initialized in smpboot.c */
908 pda->pcurrent = &init_task;
909 pda->irqstackptr = boot_cpu_stack;
910 pda->irqstackptr += IRQSTACKSIZE - 64;
911 } else {
912 if (!pda->irqstackptr) {
913 pda->irqstackptr = (char *)
914 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
915 if (!pda->irqstackptr)
916 panic("cannot allocate irqstack for cpu %d",
917 cpu);
918 pda->irqstackptr += IRQSTACKSIZE - 64;
919 }
920 950
921 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) 951static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
922 pda->nodenumber = cpu_to_node(cpu); 952 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
923 } 953 __aligned(PAGE_SIZE);
924}
925
926static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
927 DEBUG_STKSZ] __page_aligned_bss;
928 954
929extern asmlinkage void ignore_sysret(void); 955extern asmlinkage void ignore_sysret(void);
930 956
@@ -957,16 +983,21 @@ unsigned long kernel_eflags;
957 */ 983 */
958DEFINE_PER_CPU(struct orig_ist, orig_ist); 984DEFINE_PER_CPU(struct orig_ist, orig_ist);
959 985
960#else 986#else /* x86_64 */
961 987
962/* Make sure %fs is initialized properly in idle threads */ 988#ifdef CONFIG_CC_STACKPROTECTOR
989DEFINE_PER_CPU(unsigned long, stack_canary);
990#endif
991
992/* Make sure %fs and %gs are initialized properly in idle threads */
963struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) 993struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
964{ 994{
965 memset(regs, 0, sizeof(struct pt_regs)); 995 memset(regs, 0, sizeof(struct pt_regs));
966 regs->fs = __KERNEL_PERCPU; 996 regs->fs = __KERNEL_PERCPU;
997 regs->gs = __KERNEL_STACK_CANARY;
967 return regs; 998 return regs;
968} 999}
969#endif 1000#endif /* x86_64 */
970 1001
971/* 1002/*
972 * cpu_init() initializes state that is per-CPU. Some data is already 1003 * cpu_init() initializes state that is per-CPU. Some data is already
@@ -982,15 +1013,14 @@ void __cpuinit cpu_init(void)
982 struct tss_struct *t = &per_cpu(init_tss, cpu); 1013 struct tss_struct *t = &per_cpu(init_tss, cpu);
983 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); 1014 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
984 unsigned long v; 1015 unsigned long v;
985 char *estacks = NULL;
986 struct task_struct *me; 1016 struct task_struct *me;
987 int i; 1017 int i;
988 1018
989 /* CPU 0 is initialised in head64.c */ 1019#ifdef CONFIG_NUMA
990 if (cpu != 0) 1020 if (cpu != 0 && percpu_read(node_number) == 0 &&
991 pda_init(cpu); 1021 cpu_to_node(cpu) != NUMA_NO_NODE)
992 else 1022 percpu_write(node_number, cpu_to_node(cpu));
993 estacks = boot_exception_stacks; 1023#endif
994 1024
995 me = current; 1025 me = current;
996 1026
@@ -1006,7 +1036,9 @@ void __cpuinit cpu_init(void)
1006 * and set up the GDT descriptor: 1036 * and set up the GDT descriptor:
1007 */ 1037 */
1008 1038
1009 switch_to_new_gdt(); 1039 switch_to_new_gdt(cpu);
1040 loadsegment(fs, 0);
1041
1010 load_idt((const struct desc_ptr *)&idt_descr); 1042 load_idt((const struct desc_ptr *)&idt_descr);
1011 1043
1012 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); 1044 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
@@ -1017,25 +1049,20 @@ void __cpuinit cpu_init(void)
1017 barrier(); 1049 barrier();
1018 1050
1019 check_efer(); 1051 check_efer();
1020 if (cpu != 0 && x2apic) 1052 if (cpu != 0)
1021 enable_x2apic(); 1053 enable_x2apic();
1022 1054
1023 /* 1055 /*
1024 * set up and load the per-CPU TSS 1056 * set up and load the per-CPU TSS
1025 */ 1057 */
1026 if (!orig_ist->ist[0]) { 1058 if (!orig_ist->ist[0]) {
1027 static const unsigned int order[N_EXCEPTION_STACKS] = { 1059 static const unsigned int sizes[N_EXCEPTION_STACKS] = {
1028 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, 1060 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1029 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER 1061 [DEBUG_STACK - 1] = DEBUG_STKSZ
1030 }; 1062 };
1063 char *estacks = per_cpu(exception_stacks, cpu);
1031 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1064 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1032 if (cpu) { 1065 estacks += sizes[v];
1033 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
1034 if (!estacks)
1035 panic("Cannot allocate exception "
1036 "stack %ld %d\n", v, cpu);
1037 }
1038 estacks += PAGE_SIZE << order[v];
1039 orig_ist->ist[v] = t->x86_tss.ist[v] = 1066 orig_ist->ist[v] = t->x86_tss.ist[v] =
1040 (unsigned long)estacks; 1067 (unsigned long)estacks;
1041 } 1068 }
@@ -1051,8 +1078,7 @@ void __cpuinit cpu_init(void)
1051 1078
1052 atomic_inc(&init_mm.mm_count); 1079 atomic_inc(&init_mm.mm_count);
1053 me->active_mm = &init_mm; 1080 me->active_mm = &init_mm;
1054 if (me->mm) 1081 BUG_ON(me->mm);
1055 BUG();
1056 enter_lazy_tlb(&init_mm, me); 1082 enter_lazy_tlb(&init_mm, me);
1057 1083
1058 load_sp0(t, &current->thread); 1084 load_sp0(t, &current->thread);
@@ -1069,22 +1095,19 @@ void __cpuinit cpu_init(void)
1069 */ 1095 */
1070 if (kgdb_connected && arch_kgdb_ops.correct_hw_break) 1096 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
1071 arch_kgdb_ops.correct_hw_break(); 1097 arch_kgdb_ops.correct_hw_break();
1072 else { 1098 else
1073#endif 1099#endif
1074 /* 1100 {
1075 * Clear all 6 debug registers: 1101 /*
1076 */ 1102 * Clear all 6 debug registers:
1077 1103 */
1078 set_debugreg(0UL, 0); 1104 set_debugreg(0UL, 0);
1079 set_debugreg(0UL, 1); 1105 set_debugreg(0UL, 1);
1080 set_debugreg(0UL, 2); 1106 set_debugreg(0UL, 2);
1081 set_debugreg(0UL, 3); 1107 set_debugreg(0UL, 3);
1082 set_debugreg(0UL, 6); 1108 set_debugreg(0UL, 6);
1083 set_debugreg(0UL, 7); 1109 set_debugreg(0UL, 7);
1084#ifdef CONFIG_KGDB
1085 /* If the kgdb is connected no debug regs should be altered. */
1086 } 1110 }
1087#endif
1088 1111
1089 fpu_init(); 1112 fpu_init();
1090 1113
@@ -1114,15 +1137,14 @@ void __cpuinit cpu_init(void)
1114 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1137 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1115 1138
1116 load_idt(&idt_descr); 1139 load_idt(&idt_descr);
1117 switch_to_new_gdt(); 1140 switch_to_new_gdt(cpu);
1118 1141
1119 /* 1142 /*
1120 * Set up and load the per-CPU TSS and LDT 1143 * Set up and load the per-CPU TSS and LDT
1121 */ 1144 */
1122 atomic_inc(&init_mm.mm_count); 1145 atomic_inc(&init_mm.mm_count);
1123 curr->active_mm = &init_mm; 1146 curr->active_mm = &init_mm;
1124 if (curr->mm) 1147 BUG_ON(curr->mm);
1125 BUG();
1126 enter_lazy_tlb(&init_mm, curr); 1148 enter_lazy_tlb(&init_mm, curr);
1127 1149
1128 load_sp0(t, thread); 1150 load_sp0(t, thread);
@@ -1135,9 +1157,6 @@ void __cpuinit cpu_init(void)
1135 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); 1157 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
1136#endif 1158#endif
1137 1159
1138 /* Clear %gs. */
1139 asm volatile ("mov %0, %%gs" : : "r" (0));
1140
1141 /* Clear all 6 debug registers: */ 1160 /* Clear all 6 debug registers: */
1142 set_debugreg(0, 0); 1161 set_debugreg(0, 0);
1143 set_debugreg(0, 1); 1162 set_debugreg(0, 1);
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
new file mode 100755
index 00000000000..9abbcbd933c
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -0,0 +1,785 @@
1/*
2 * CPU x86 architecture debug code
3 *
4 * Copyright(C) 2009 Jaswinder Singh Rajput
5 *
6 * For licencing details see kernel-base/COPYING
7 */
8
9#include <linux/interrupt.h>
10#include <linux/compiler.h>
11#include <linux/seq_file.h>
12#include <linux/debugfs.h>
13#include <linux/kprobes.h>
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/percpu.h>
17#include <linux/signal.h>
18#include <linux/errno.h>
19#include <linux/sched.h>
20#include <linux/types.h>
21#include <linux/init.h>
22#include <linux/slab.h>
23#include <linux/smp.h>
24
25#include <asm/cpu_debug.h>
26#include <asm/paravirt.h>
27#include <asm/system.h>
28#include <asm/traps.h>
29#include <asm/apic.h>
30#include <asm/desc.h>
31
32static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
33static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
34static DEFINE_PER_CPU(unsigned, cpu_modelflag);
35static DEFINE_PER_CPU(int, cpu_priv_count);
36static DEFINE_PER_CPU(unsigned, cpu_model);
37
38static DEFINE_MUTEX(cpu_debug_lock);
39
40static struct dentry *cpu_debugfs_dir;
41
42static struct cpu_debug_base cpu_base[] = {
43 { "mc", CPU_MC }, /* Machine Check */
44 { "monitor", CPU_MONITOR }, /* Monitor */
45 { "time", CPU_TIME }, /* Time */
46 { "pmc", CPU_PMC }, /* Performance Monitor */
47 { "platform", CPU_PLATFORM }, /* Platform */
48 { "apic", CPU_APIC }, /* APIC */
49 { "poweron", CPU_POWERON }, /* Power-on */
50 { "control", CPU_CONTROL }, /* Control */
51 { "features", CPU_FEATURES }, /* Features control */
52 { "lastbranch", CPU_LBRANCH }, /* Last Branch */
53 { "bios", CPU_BIOS }, /* BIOS */
54 { "freq", CPU_FREQ }, /* Frequency */
55 { "mtrr", CPU_MTRR }, /* MTRR */
56 { "perf", CPU_PERF }, /* Performance */
57 { "cache", CPU_CACHE }, /* Cache */
58 { "sysenter", CPU_SYSENTER }, /* Sysenter */
59 { "therm", CPU_THERM }, /* Thermal */
60 { "misc", CPU_MISC }, /* Miscellaneous */
61 { "debug", CPU_DEBUG }, /* Debug */
62 { "pat", CPU_PAT }, /* PAT */
63 { "vmx", CPU_VMX }, /* VMX */
64 { "call", CPU_CALL }, /* System Call */
65 { "base", CPU_BASE }, /* BASE Address */
66 { "smm", CPU_SMM }, /* System mgmt mode */
67 { "svm", CPU_SVM }, /*Secure Virtial Machine*/
68 { "osvm", CPU_OSVM }, /* OS-Visible Workaround*/
69 { "tss", CPU_TSS }, /* Task Stack Segment */
70 { "cr", CPU_CR }, /* Control Registers */
71 { "dt", CPU_DT }, /* Descriptor Table */
72 { "registers", CPU_REG_ALL }, /* Select all Registers */
73};
74
75static struct cpu_file_base cpu_file[] = {
76 { "index", CPU_REG_ALL }, /* index */
77 { "value", CPU_REG_ALL }, /* value */
78};
79
80/* Intel Registers Range */
81static struct cpu_debug_range cpu_intel_range[] = {
82 { 0x00000000, 0x00000001, CPU_MC, CPU_INTEL_ALL },
83 { 0x00000006, 0x00000007, CPU_MONITOR, CPU_CX_AT_XE },
84 { 0x00000010, 0x00000010, CPU_TIME, CPU_INTEL_ALL },
85 { 0x00000011, 0x00000013, CPU_PMC, CPU_INTEL_PENTIUM },
86 { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE },
87 { 0x0000001B, 0x0000001B, CPU_APIC, CPU_P6_CX_AT_XE },
88
89 { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_PX_CX_AT_XE },
90 { 0x0000002B, 0x0000002B, CPU_POWERON, CPU_INTEL_XEON },
91 { 0x0000002C, 0x0000002C, CPU_FREQ, CPU_INTEL_XEON },
92 { 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CX_AT_XE },
93
94 { 0x00000040, 0x00000043, CPU_LBRANCH, CPU_PM_CX_AT_XE },
95 { 0x00000044, 0x00000047, CPU_LBRANCH, CPU_PM_CO_AT },
96 { 0x00000060, 0x00000063, CPU_LBRANCH, CPU_C2_AT },
97 { 0x00000064, 0x00000067, CPU_LBRANCH, CPU_INTEL_ATOM },
98
99 { 0x00000079, 0x00000079, CPU_BIOS, CPU_P6_CX_AT_XE },
100 { 0x00000088, 0x0000008A, CPU_CACHE, CPU_INTEL_P6 },
101 { 0x0000008B, 0x0000008B, CPU_BIOS, CPU_P6_CX_AT_XE },
102 { 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_INTEL_XEON },
103
104 { 0x000000C1, 0x000000C2, CPU_PMC, CPU_P6_CX_AT },
105 { 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CX_AT },
106 { 0x000000E7, 0x000000E8, CPU_PERF, CPU_CX_AT },
107 { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_P6_CX_XE },
108
109 { 0x00000116, 0x00000116, CPU_CACHE, CPU_INTEL_P6 },
110 { 0x00000118, 0x00000118, CPU_CACHE, CPU_INTEL_P6 },
111 { 0x00000119, 0x00000119, CPU_CACHE, CPU_INTEL_PX },
112 { 0x0000011A, 0x0000011B, CPU_CACHE, CPU_INTEL_P6 },
113 { 0x0000011E, 0x0000011E, CPU_CACHE, CPU_PX_CX_AT },
114
115 { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE },
116 { 0x00000179, 0x0000017A, CPU_MC, CPU_PX_CX_AT_XE },
117 { 0x0000017B, 0x0000017B, CPU_MC, CPU_P6_XE },
118 { 0x00000186, 0x00000187, CPU_PMC, CPU_P6_CX_AT },
119 { 0x00000198, 0x00000199, CPU_PERF, CPU_PM_CX_AT_XE },
120 { 0x0000019A, 0x0000019A, CPU_TIME, CPU_PM_CX_AT_XE },
121 { 0x0000019B, 0x0000019D, CPU_THERM, CPU_PM_CX_AT_XE },
122 { 0x000001A0, 0x000001A0, CPU_MISC, CPU_PM_CX_AT_XE },
123
124 { 0x000001C9, 0x000001C9, CPU_LBRANCH, CPU_PM_CX_AT },
125 { 0x000001D7, 0x000001D8, CPU_LBRANCH, CPU_INTEL_XEON },
126 { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CX_AT_XE },
127 { 0x000001DA, 0x000001DA, CPU_LBRANCH, CPU_INTEL_XEON },
128 { 0x000001DB, 0x000001DB, CPU_LBRANCH, CPU_P6_XE },
129 { 0x000001DC, 0x000001DC, CPU_LBRANCH, CPU_INTEL_P6 },
130 { 0x000001DD, 0x000001DE, CPU_LBRANCH, CPU_PX_CX_AT_XE },
131 { 0x000001E0, 0x000001E0, CPU_LBRANCH, CPU_INTEL_P6 },
132
133 { 0x00000200, 0x0000020F, CPU_MTRR, CPU_P6_CX_XE },
134 { 0x00000250, 0x00000250, CPU_MTRR, CPU_P6_CX_XE },
135 { 0x00000258, 0x00000259, CPU_MTRR, CPU_P6_CX_XE },
136 { 0x00000268, 0x0000026F, CPU_MTRR, CPU_P6_CX_XE },
137 { 0x00000277, 0x00000277, CPU_PAT, CPU_C2_AT_XE },
138 { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_P6_CX_XE },
139
140 { 0x00000300, 0x00000308, CPU_PMC, CPU_INTEL_XEON },
141 { 0x00000309, 0x0000030B, CPU_PMC, CPU_C2_AT_XE },
142 { 0x0000030C, 0x00000311, CPU_PMC, CPU_INTEL_XEON },
143 { 0x00000345, 0x00000345, CPU_PMC, CPU_C2_AT },
144 { 0x00000360, 0x00000371, CPU_PMC, CPU_INTEL_XEON },
145 { 0x0000038D, 0x00000390, CPU_PMC, CPU_C2_AT },
146 { 0x000003A0, 0x000003BE, CPU_PMC, CPU_INTEL_XEON },
147 { 0x000003C0, 0x000003CD, CPU_PMC, CPU_INTEL_XEON },
148 { 0x000003E0, 0x000003E1, CPU_PMC, CPU_INTEL_XEON },
149 { 0x000003F0, 0x000003F0, CPU_PMC, CPU_INTEL_XEON },
150 { 0x000003F1, 0x000003F1, CPU_PMC, CPU_C2_AT_XE },
151 { 0x000003F2, 0x000003F2, CPU_PMC, CPU_INTEL_XEON },
152
153 { 0x00000400, 0x00000402, CPU_MC, CPU_PM_CX_AT_XE },
154 { 0x00000403, 0x00000403, CPU_MC, CPU_INTEL_XEON },
155 { 0x00000404, 0x00000406, CPU_MC, CPU_PM_CX_AT_XE },
156 { 0x00000407, 0x00000407, CPU_MC, CPU_INTEL_XEON },
157 { 0x00000408, 0x0000040A, CPU_MC, CPU_PM_CX_AT_XE },
158 { 0x0000040B, 0x0000040B, CPU_MC, CPU_INTEL_XEON },
159 { 0x0000040C, 0x0000040E, CPU_MC, CPU_PM_CX_XE },
160 { 0x0000040F, 0x0000040F, CPU_MC, CPU_INTEL_XEON },
161 { 0x00000410, 0x00000412, CPU_MC, CPU_PM_CX_AT_XE },
162 { 0x00000413, 0x00000417, CPU_MC, CPU_CX_AT_XE },
163 { 0x00000480, 0x0000048B, CPU_VMX, CPU_CX_AT_XE },
164
165 { 0x00000600, 0x00000600, CPU_DEBUG, CPU_PM_CX_AT_XE },
166 { 0x00000680, 0x0000068F, CPU_LBRANCH, CPU_INTEL_XEON },
167 { 0x000006C0, 0x000006CF, CPU_LBRANCH, CPU_INTEL_XEON },
168
169 { 0x000107CC, 0x000107D3, CPU_PMC, CPU_INTEL_XEON_MP },
170
171 { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON },
172 { 0xC0000081, 0xC0000082, CPU_CALL, CPU_INTEL_XEON },
173 { 0xC0000084, 0xC0000084, CPU_CALL, CPU_INTEL_XEON },
174 { 0xC0000100, 0xC0000102, CPU_BASE, CPU_INTEL_XEON },
175};
176
177/* AMD Registers Range */
178static struct cpu_debug_range cpu_amd_range[] = {
179 { 0x00000010, 0x00000010, CPU_TIME, CPU_ALL, },
180 { 0x0000001B, 0x0000001B, CPU_APIC, CPU_ALL, },
181 { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_ALL, },
182
183 { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_ALL, },
184 { 0x00000179, 0x0000017A, CPU_MC, CPU_ALL, },
185 { 0x0000017B, 0x0000017B, CPU_MC, CPU_ALL, },
186 { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_ALL, },
187 { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_ALL, },
188
189 { 0x00000200, 0x0000020F, CPU_MTRR, CPU_ALL, },
190 { 0x00000250, 0x00000250, CPU_MTRR, CPU_ALL, },
191 { 0x00000258, 0x00000259, CPU_MTRR, CPU_ALL, },
192 { 0x00000268, 0x0000026F, CPU_MTRR, CPU_ALL, },
193 { 0x00000277, 0x00000277, CPU_PAT, CPU_ALL, },
194 { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_ALL, },
195
196 { 0x00000400, 0x00000417, CPU_MC, CPU_ALL, },
197
198 { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_ALL, },
199 { 0xC0000081, 0xC0000084, CPU_CALL, CPU_ALL, },
200 { 0xC0000100, 0xC0000102, CPU_BASE, CPU_ALL, },
201 { 0xC0000103, 0xC0000103, CPU_TIME, CPU_ALL, },
202
203 { 0xC0000408, 0xC000040A, CPU_MC, CPU_ALL, },
204
205 { 0xc0010000, 0xc0010007, CPU_PMC, CPU_ALL, },
206 { 0xc0010010, 0xc0010010, CPU_MTRR, CPU_ALL, },
207 { 0xc0010016, 0xc001001A, CPU_MTRR, CPU_ALL, },
208 { 0xc001001D, 0xc001001D, CPU_MTRR, CPU_ALL, },
209 { 0xc0010030, 0xc0010035, CPU_BIOS, CPU_ALL, },
210 { 0xc0010056, 0xc0010056, CPU_SMM, CPU_ALL, },
211 { 0xc0010061, 0xc0010063, CPU_SMM, CPU_ALL, },
212 { 0xc0010074, 0xc0010074, CPU_MC, CPU_ALL, },
213 { 0xc0010111, 0xc0010113, CPU_SMM, CPU_ALL, },
214 { 0xc0010114, 0xc0010118, CPU_SVM, CPU_ALL, },
215 { 0xc0010119, 0xc001011A, CPU_SMM, CPU_ALL, },
216 { 0xc0010140, 0xc0010141, CPU_OSVM, CPU_ALL, },
217 { 0xc0010156, 0xc0010156, CPU_SMM, CPU_ALL, },
218};
219
220
221static int get_cpu_modelflag(unsigned cpu)
222{
223 int flag;
224
225 switch (per_cpu(cpu_model, cpu)) {
226 /* Intel */
227 case 0x0501:
228 case 0x0502:
229 case 0x0504:
230 flag = CPU_INTEL_PENTIUM;
231 break;
232 case 0x0601:
233 case 0x0603:
234 case 0x0605:
235 case 0x0607:
236 case 0x0608:
237 case 0x060A:
238 case 0x060B:
239 flag = CPU_INTEL_P6;
240 break;
241 case 0x0609:
242 case 0x060D:
243 flag = CPU_INTEL_PENTIUM_M;
244 break;
245 case 0x060E:
246 flag = CPU_INTEL_CORE;
247 break;
248 case 0x060F:
249 case 0x0617:
250 flag = CPU_INTEL_CORE2;
251 break;
252 case 0x061C:
253 flag = CPU_INTEL_ATOM;
254 break;
255 case 0x0F00:
256 case 0x0F01:
257 case 0x0F02:
258 case 0x0F03:
259 case 0x0F04:
260 flag = CPU_INTEL_XEON_P4;
261 break;
262 case 0x0F06:
263 flag = CPU_INTEL_XEON_MP;
264 break;
265 default:
266 flag = CPU_NONE;
267 break;
268 }
269
270 return flag;
271}
272
273static int get_cpu_range_count(unsigned cpu)
274{
275 int index;
276
277 switch (per_cpu(cpu_model, cpu) >> 16) {
278 case X86_VENDOR_INTEL:
279 index = ARRAY_SIZE(cpu_intel_range);
280 break;
281 case X86_VENDOR_AMD:
282 index = ARRAY_SIZE(cpu_amd_range);
283 break;
284 default:
285 index = 0;
286 break;
287 }
288
289 return index;
290}
291
292static int is_typeflag_valid(unsigned cpu, unsigned flag)
293{
294 unsigned vendor, modelflag;
295 int i, index;
296
297 /* Standard Registers should be always valid */
298 if (flag >= CPU_TSS)
299 return 1;
300
301 modelflag = per_cpu(cpu_modelflag, cpu);
302 vendor = per_cpu(cpu_model, cpu) >> 16;
303 index = get_cpu_range_count(cpu);
304
305 for (i = 0; i < index; i++) {
306 switch (vendor) {
307 case X86_VENDOR_INTEL:
308 if ((cpu_intel_range[i].model & modelflag) &&
309 (cpu_intel_range[i].flag & flag))
310 return 1;
311 break;
312 case X86_VENDOR_AMD:
313 if (cpu_amd_range[i].flag & flag)
314 return 1;
315 break;
316 }
317 }
318
319 /* Invalid */
320 return 0;
321}
322
323static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
324 int index, unsigned flag)
325{
326 unsigned modelflag;
327
328 modelflag = per_cpu(cpu_modelflag, cpu);
329 *max = 0;
330 switch (per_cpu(cpu_model, cpu) >> 16) {
331 case X86_VENDOR_INTEL:
332 if ((cpu_intel_range[index].model & modelflag) &&
333 (cpu_intel_range[index].flag & flag)) {
334 *min = cpu_intel_range[index].min;
335 *max = cpu_intel_range[index].max;
336 }
337 break;
338 case X86_VENDOR_AMD:
339 if (cpu_amd_range[index].flag & flag) {
340 *min = cpu_amd_range[index].min;
341 *max = cpu_amd_range[index].max;
342 }
343 break;
344 }
345
346 return *max;
347}
348
349/* This function can also be called with seq = NULL for printk */
350static void print_cpu_data(struct seq_file *seq, unsigned type,
351 u32 low, u32 high)
352{
353 struct cpu_private *priv;
354 u64 val = high;
355
356 if (seq) {
357 priv = seq->private;
358 if (priv->file) {
359 val = (val << 32) | low;
360 seq_printf(seq, "0x%llx\n", val);
361 } else
362 seq_printf(seq, " %08x: %08x_%08x\n",
363 type, high, low);
364 } else
365 printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low);
366}
367
368/* This function can also be called with seq = NULL for printk */
369static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
370{
371 unsigned msr, msr_min, msr_max;
372 struct cpu_private *priv;
373 u32 low, high;
374 int i, range;
375
376 if (seq) {
377 priv = seq->private;
378 if (priv->file) {
379 if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg,
380 &low, &high))
381 print_cpu_data(seq, priv->reg, low, high);
382 return;
383 }
384 }
385
386 range = get_cpu_range_count(cpu);
387
388 for (i = 0; i < range; i++) {
389 if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
390 continue;
391
392 for (msr = msr_min; msr <= msr_max; msr++) {
393 if (rdmsr_safe_on_cpu(cpu, msr, &low, &high))
394 continue;
395 print_cpu_data(seq, msr, low, high);
396 }
397 }
398}
399
400static void print_tss(void *arg)
401{
402 struct pt_regs *regs = task_pt_regs(current);
403 struct seq_file *seq = arg;
404 unsigned int seg;
405
406 seq_printf(seq, " RAX\t: %016lx\n", regs->ax);
407 seq_printf(seq, " RBX\t: %016lx\n", regs->bx);
408 seq_printf(seq, " RCX\t: %016lx\n", regs->cx);
409 seq_printf(seq, " RDX\t: %016lx\n", regs->dx);
410
411 seq_printf(seq, " RSI\t: %016lx\n", regs->si);
412 seq_printf(seq, " RDI\t: %016lx\n", regs->di);
413 seq_printf(seq, " RBP\t: %016lx\n", regs->bp);
414 seq_printf(seq, " ESP\t: %016lx\n", regs->sp);
415
416#ifdef CONFIG_X86_64
417 seq_printf(seq, " R08\t: %016lx\n", regs->r8);
418 seq_printf(seq, " R09\t: %016lx\n", regs->r9);
419 seq_printf(seq, " R10\t: %016lx\n", regs->r10);
420 seq_printf(seq, " R11\t: %016lx\n", regs->r11);
421 seq_printf(seq, " R12\t: %016lx\n", regs->r12);
422 seq_printf(seq, " R13\t: %016lx\n", regs->r13);
423 seq_printf(seq, " R14\t: %016lx\n", regs->r14);
424 seq_printf(seq, " R15\t: %016lx\n", regs->r15);
425#endif
426
427 asm("movl %%cs,%0" : "=r" (seg));
428 seq_printf(seq, " CS\t: %04x\n", seg);
429 asm("movl %%ds,%0" : "=r" (seg));
430 seq_printf(seq, " DS\t: %04x\n", seg);
431 seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff);
432 asm("movl %%es,%0" : "=r" (seg));
433 seq_printf(seq, " ES\t: %04x\n", seg);
434 asm("movl %%fs,%0" : "=r" (seg));
435 seq_printf(seq, " FS\t: %04x\n", seg);
436 asm("movl %%gs,%0" : "=r" (seg));
437 seq_printf(seq, " GS\t: %04x\n", seg);
438
439 seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags);
440
441 seq_printf(seq, " EIP\t: %016lx\n", regs->ip);
442}
443
444static void print_cr(void *arg)
445{
446 struct seq_file *seq = arg;
447
448 seq_printf(seq, " cr0\t: %016lx\n", read_cr0());
449 seq_printf(seq, " cr2\t: %016lx\n", read_cr2());
450 seq_printf(seq, " cr3\t: %016lx\n", read_cr3());
451 seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe());
452#ifdef CONFIG_X86_64
453 seq_printf(seq, " cr8\t: %016lx\n", read_cr8());
454#endif
455}
456
457static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt)
458{
459 seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size));
460}
461
462static void print_dt(void *seq)
463{
464 struct desc_ptr dt;
465 unsigned long ldt;
466
467 /* IDT */
468 store_idt((struct desc_ptr *)&dt);
469 print_desc_ptr("IDT", seq, dt);
470
471 /* GDT */
472 store_gdt((struct desc_ptr *)&dt);
473 print_desc_ptr("GDT", seq, dt);
474
475 /* LDT */
476 store_ldt(ldt);
477 seq_printf(seq, " LDT\t: %016lx\n", ldt);
478
479 /* TR */
480 store_tr(ldt);
481 seq_printf(seq, " TR\t: %016lx\n", ldt);
482}
483
484static void print_dr(void *arg)
485{
486 struct seq_file *seq = arg;
487 unsigned long dr;
488 int i;
489
490 for (i = 0; i < 8; i++) {
491 /* Ignore db4, db5 */
492 if ((i == 4) || (i == 5))
493 continue;
494 get_debugreg(dr, i);
495 seq_printf(seq, " dr%d\t: %016lx\n", i, dr);
496 }
497
498 seq_printf(seq, "\n MSR\t:\n");
499}
500
501static void print_apic(void *arg)
502{
503 struct seq_file *seq = arg;
504
505#ifdef CONFIG_X86_LOCAL_APIC
506 seq_printf(seq, " LAPIC\t:\n");
507 seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24);
508 seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR));
509 seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI));
510 seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI));
511 seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI));
512 seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR));
513 seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR));
514 seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV));
515 seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR));
516 seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR));
517 seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR));
518 seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2));
519 seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT));
520 seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR));
521 seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC));
522 seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0));
523 seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1));
524 seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR));
525 seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT));
526 seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT));
527 seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR));
528#endif /* CONFIG_X86_LOCAL_APIC */
529
530 seq_printf(seq, "\n MSR\t:\n");
531}
532
533static int cpu_seq_show(struct seq_file *seq, void *v)
534{
535 struct cpu_private *priv = seq->private;
536
537 if (priv == NULL)
538 return -EINVAL;
539
540 switch (cpu_base[priv->type].flag) {
541 case CPU_TSS:
542 smp_call_function_single(priv->cpu, print_tss, seq, 1);
543 break;
544 case CPU_CR:
545 smp_call_function_single(priv->cpu, print_cr, seq, 1);
546 break;
547 case CPU_DT:
548 smp_call_function_single(priv->cpu, print_dt, seq, 1);
549 break;
550 case CPU_DEBUG:
551 if (priv->file == CPU_INDEX_BIT)
552 smp_call_function_single(priv->cpu, print_dr, seq, 1);
553 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
554 break;
555 case CPU_APIC:
556 if (priv->file == CPU_INDEX_BIT)
557 smp_call_function_single(priv->cpu, print_apic, seq, 1);
558 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
559 break;
560
561 default:
562 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
563 break;
564 }
565 seq_printf(seq, "\n");
566
567 return 0;
568}
569
570static void *cpu_seq_start(struct seq_file *seq, loff_t *pos)
571{
572 if (*pos == 0) /* One time is enough ;-) */
573 return seq;
574
575 return NULL;
576}
577
578static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
579{
580 (*pos)++;
581
582 return cpu_seq_start(seq, pos);
583}
584
585static void cpu_seq_stop(struct seq_file *seq, void *v)
586{
587}
588
589static const struct seq_operations cpu_seq_ops = {
590 .start = cpu_seq_start,
591 .next = cpu_seq_next,
592 .stop = cpu_seq_stop,
593 .show = cpu_seq_show,
594};
595
596static int cpu_seq_open(struct inode *inode, struct file *file)
597{
598 struct cpu_private *priv = inode->i_private;
599 struct seq_file *seq;
600 int err;
601
602 err = seq_open(file, &cpu_seq_ops);
603 if (!err) {
604 seq = file->private_data;
605 seq->private = priv;
606 }
607
608 return err;
609}
610
611static const struct file_operations cpu_fops = {
612 .open = cpu_seq_open,
613 .read = seq_read,
614 .llseek = seq_lseek,
615 .release = seq_release,
616};
617
618static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
619 unsigned file, struct dentry *dentry)
620{
621 struct cpu_private *priv = NULL;
622
623 /* Already intialized */
624 if (file == CPU_INDEX_BIT)
625 if (per_cpu(cpu_arr[type].init, cpu))
626 return 0;
627
628 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
629 if (priv == NULL)
630 return -ENOMEM;
631
632 priv->cpu = cpu;
633 priv->type = type;
634 priv->reg = reg;
635 priv->file = file;
636 mutex_lock(&cpu_debug_lock);
637 per_cpu(priv_arr[type], cpu) = priv;
638 per_cpu(cpu_priv_count, cpu)++;
639 mutex_unlock(&cpu_debug_lock);
640
641 if (file)
642 debugfs_create_file(cpu_file[file].name, S_IRUGO,
643 dentry, (void *)priv, &cpu_fops);
644 else {
645 debugfs_create_file(cpu_base[type].name, S_IRUGO,
646 per_cpu(cpu_arr[type].dentry, cpu),
647 (void *)priv, &cpu_fops);
648 mutex_lock(&cpu_debug_lock);
649 per_cpu(cpu_arr[type].init, cpu) = 1;
650 mutex_unlock(&cpu_debug_lock);
651 }
652
653 return 0;
654}
655
656static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg,
657 struct dentry *dentry)
658{
659 unsigned file;
660 int err = 0;
661
662 for (file = 0; file < ARRAY_SIZE(cpu_file); file++) {
663 err = cpu_create_file(cpu, type, reg, file, dentry);
664 if (err)
665 return err;
666 }
667
668 return err;
669}
670
671static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
672{
673 struct dentry *cpu_dentry = NULL;
674 unsigned reg, reg_min, reg_max;
675 int i, range, err = 0;
676 char reg_dir[12];
677 u32 low, high;
678
679 range = get_cpu_range_count(cpu);
680
681 for (i = 0; i < range; i++) {
682 if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
683 cpu_base[type].flag))
684 continue;
685
686 for (reg = reg_min; reg <= reg_max; reg++) {
687 if (rdmsr_safe_on_cpu(cpu, reg, &low, &high))
688 continue;
689
690 sprintf(reg_dir, "0x%x", reg);
691 cpu_dentry = debugfs_create_dir(reg_dir, dentry);
692 err = cpu_init_regfiles(cpu, type, reg, cpu_dentry);
693 if (err)
694 return err;
695 }
696 }
697
698 return err;
699}
700
701static int cpu_init_allreg(unsigned cpu, struct dentry *dentry)
702{
703 struct dentry *cpu_dentry = NULL;
704 unsigned type;
705 int err = 0;
706
707 for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) {
708 if (!is_typeflag_valid(cpu, cpu_base[type].flag))
709 continue;
710 cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry);
711 per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry;
712
713 if (type < CPU_TSS_BIT)
714 err = cpu_init_msr(cpu, type, cpu_dentry);
715 else
716 err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT,
717 cpu_dentry);
718 if (err)
719 return err;
720 }
721
722 return err;
723}
724
725static int cpu_init_cpu(void)
726{
727 struct dentry *cpu_dentry = NULL;
728 struct cpuinfo_x86 *cpui;
729 char cpu_dir[12];
730 unsigned cpu;
731 int err = 0;
732
733 for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
734 cpui = &cpu_data(cpu);
735 if (!cpu_has(cpui, X86_FEATURE_MSR))
736 continue;
737 per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) |
738 (cpui->x86 << 8) |
739 (cpui->x86_model));
740 per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu);
741
742 sprintf(cpu_dir, "cpu%d", cpu);
743 cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
744 err = cpu_init_allreg(cpu, cpu_dentry);
745
746 pr_info("cpu%d(%d) debug files %d\n",
747 cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu));
748 if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) {
749 pr_err("Register files count %d exceeds limit %d\n",
750 per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES);
751 per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES;
752 err = -ENFILE;
753 }
754 if (err)
755 return err;
756 }
757
758 return err;
759}
760
761static int __init cpu_debug_init(void)
762{
763 cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir);
764
765 return cpu_init_cpu();
766}
767
768static void __exit cpu_debug_exit(void)
769{
770 int i, cpu;
771
772 if (cpu_debugfs_dir)
773 debugfs_remove_recursive(cpu_debugfs_dir);
774
775 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
776 for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++)
777 kfree(per_cpu(priv_arr[i], cpu));
778}
779
780module_init(cpu_debug_init);
781module_exit(cpu_debug_exit);
782
783MODULE_AUTHOR("Jaswinder Singh Rajput");
784MODULE_DESCRIPTION("CPU Debug module");
785MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 4b1c319d30c..22590cf688a 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -601,7 +601,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
601 if (!data) 601 if (!data)
602 return -ENOMEM; 602 return -ENOMEM;
603 603
604 data->acpi_data = percpu_ptr(acpi_perf_data, cpu); 604 data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
605 per_cpu(drv_data, cpu) = data; 605 per_cpu(drv_data, cpu) = data;
606 606
607 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) 607 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index c2f930d8664..41ab3f064cb 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -204,12 +204,12 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
204 } 204 }
205 /* Enable Enhanced PowerSaver */ 205 /* Enable Enhanced PowerSaver */
206 rdmsrl(MSR_IA32_MISC_ENABLE, val); 206 rdmsrl(MSR_IA32_MISC_ENABLE, val);
207 if (!(val & 1 << 16)) { 207 if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
208 val |= 1 << 16; 208 val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
209 wrmsrl(MSR_IA32_MISC_ENABLE, val); 209 wrmsrl(MSR_IA32_MISC_ENABLE, val);
210 /* Can be locked at 0 */ 210 /* Can be locked at 0 */
211 rdmsrl(MSR_IA32_MISC_ENABLE, val); 211 rdmsrl(MSR_IA32_MISC_ENABLE, val);
212 if (!(val & 1 << 16)) { 212 if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
213 printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n"); 213 printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
214 return -ENODEV; 214 return -ENODEV;
215 } 215 }
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index f08998278a3..c9f1fdc0283 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -390,14 +390,14 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
390 enable it if not. */ 390 enable it if not. */
391 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 391 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
392 392
393 if (!(l & (1<<16))) { 393 if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
394 l |= (1<<16); 394 l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
395 dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); 395 dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
396 wrmsr(MSR_IA32_MISC_ENABLE, l, h); 396 wrmsr(MSR_IA32_MISC_ENABLE, l, h);
397 397
398 /* check to see if it stuck */ 398 /* check to see if it stuck */
399 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 399 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
400 if (!(l & (1<<16))) { 400 if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
401 printk(KERN_INFO PFX 401 printk(KERN_INFO PFX
402 "couldn't enable Enhanced SpeedStep\n"); 402 "couldn't enable Enhanced SpeedStep\n");
403 return -ENODEV; 403 return -ENODEV;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 24ff26a38ad..191117f1ad5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -13,6 +13,7 @@
13#include <asm/uaccess.h> 13#include <asm/uaccess.h>
14#include <asm/ds.h> 14#include <asm/ds.h>
15#include <asm/bugs.h> 15#include <asm/bugs.h>
16#include <asm/cpu.h>
16 17
17#ifdef CONFIG_X86_64 18#ifdef CONFIG_X86_64
18#include <asm/topology.h> 19#include <asm/topology.h>
@@ -24,7 +25,6 @@
24#ifdef CONFIG_X86_LOCAL_APIC 25#ifdef CONFIG_X86_LOCAL_APIC
25#include <asm/mpspec.h> 26#include <asm/mpspec.h>
26#include <asm/apic.h> 27#include <asm/apic.h>
27#include <mach_apic.h>
28#endif 28#endif
29 29
30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) 30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
@@ -63,6 +63,18 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
63 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 63 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
64 } 64 }
65 65
66 /*
67 * There is a known erratum on Pentium III and Core Solo
68 * and Core Duo CPUs.
69 * " Page with PAT set to WC while associated MTRR is UC
70 * may consolidate to UC "
71 * Because of this erratum, it is better to stick with
72 * setting WC in MTRR rather than using PAT on these CPUs.
73 *
74 * Enable PAT WC only on P4, Core 2 or later CPUs.
75 */
76 if (c->x86 == 6 && c->x86_model < 15)
77 clear_cpu_cap(c, X86_FEATURE_PAT);
66} 78}
67 79
68#ifdef CONFIG_X86_32 80#ifdef CONFIG_X86_32
@@ -99,6 +111,28 @@ static void __cpuinit trap_init_f00f_bug(void)
99} 111}
100#endif 112#endif
101 113
114static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
115{
116#ifdef CONFIG_SMP
117 /* calling is from identify_secondary_cpu() ? */
118 if (c->cpu_index == boot_cpu_id)
119 return;
120
121 /*
122 * Mask B, Pentium, but not Pentium MMX
123 */
124 if (c->x86 == 5 &&
125 c->x86_mask >= 1 && c->x86_mask <= 4 &&
126 c->x86_model <= 3) {
127 /*
128 * Remember we have B step Pentia with bugs
129 */
130 WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
131 "with B stepping processors.\n");
132 }
133#endif
134}
135
102static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) 136static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
103{ 137{
104 unsigned long lo, hi; 138 unsigned long lo, hi;
@@ -135,10 +169,10 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
135 */ 169 */
136 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { 170 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
137 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); 171 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
138 if ((lo & (1<<9)) == 0) { 172 if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) {
139 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); 173 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
140 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); 174 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
141 lo |= (1<<9); /* Disable hw prefetching */ 175 lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE;
142 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); 176 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
143 } 177 }
144 } 178 }
@@ -175,6 +209,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
175#ifdef CONFIG_X86_NUMAQ 209#ifdef CONFIG_X86_NUMAQ
176 numaq_tsc_disable(); 210 numaq_tsc_disable();
177#endif 211#endif
212
213 intel_smp_check(c);
178} 214}
179#else 215#else
180static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) 216static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index da299eb85fc..7293508d8f5 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -147,7 +147,16 @@ struct _cpuid4_info {
147 union _cpuid4_leaf_ecx ecx; 147 union _cpuid4_leaf_ecx ecx;
148 unsigned long size; 148 unsigned long size;
149 unsigned long can_disable; 149 unsigned long can_disable;
150 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ 150 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
151};
152
153/* subset of above _cpuid4_info w/o shared_cpu_map */
154struct _cpuid4_info_regs {
155 union _cpuid4_leaf_eax eax;
156 union _cpuid4_leaf_ebx ebx;
157 union _cpuid4_leaf_ecx ecx;
158 unsigned long size;
159 unsigned long can_disable;
151}; 160};
152 161
153#ifdef CONFIG_PCI 162#ifdef CONFIG_PCI
@@ -278,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
278} 287}
279 288
280static void __cpuinit 289static void __cpuinit
281amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) 290amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
282{ 291{
283 if (index < 3) 292 if (index < 3)
284 return; 293 return;
@@ -286,7 +295,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
286} 295}
287 296
288static int 297static int
289__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 298__cpuinit cpuid4_cache_lookup_regs(int index,
299 struct _cpuid4_info_regs *this_leaf)
290{ 300{
291 union _cpuid4_leaf_eax eax; 301 union _cpuid4_leaf_eax eax;
292 union _cpuid4_leaf_ebx ebx; 302 union _cpuid4_leaf_ebx ebx;
@@ -314,6 +324,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
314 return 0; 324 return 0;
315} 325}
316 326
327static int
328__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
329{
330 struct _cpuid4_info_regs *leaf_regs =
331 (struct _cpuid4_info_regs *)this_leaf;
332
333 return cpuid4_cache_lookup_regs(index, leaf_regs);
334}
335
317static int __cpuinit find_num_cache_leaves(void) 336static int __cpuinit find_num_cache_leaves(void)
318{ 337{
319 unsigned int eax, ebx, ecx, edx; 338 unsigned int eax, ebx, ecx, edx;
@@ -353,11 +372,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
353 * parameters cpuid leaf to find the cache details 372 * parameters cpuid leaf to find the cache details
354 */ 373 */
355 for (i = 0; i < num_cache_leaves; i++) { 374 for (i = 0; i < num_cache_leaves; i++) {
356 struct _cpuid4_info this_leaf; 375 struct _cpuid4_info_regs this_leaf;
357
358 int retval; 376 int retval;
359 377
360 retval = cpuid4_cache_lookup(i, &this_leaf); 378 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
361 if (retval >= 0) { 379 if (retval >= 0) {
362 switch(this_leaf.eax.split.level) { 380 switch(this_leaf.eax.split.level) {
363 case 1: 381 case 1:
@@ -506,17 +524,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
506 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 524 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
507 525
508 if (num_threads_sharing == 1) 526 if (num_threads_sharing == 1)
509 cpu_set(cpu, this_leaf->shared_cpu_map); 527 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
510 else { 528 else {
511 index_msb = get_count_order(num_threads_sharing); 529 index_msb = get_count_order(num_threads_sharing);
512 530
513 for_each_online_cpu(i) { 531 for_each_online_cpu(i) {
514 if (cpu_data(i).apicid >> index_msb == 532 if (cpu_data(i).apicid >> index_msb ==
515 c->apicid >> index_msb) { 533 c->apicid >> index_msb) {
516 cpu_set(i, this_leaf->shared_cpu_map); 534 cpumask_set_cpu(i,
535 to_cpumask(this_leaf->shared_cpu_map));
517 if (i != cpu && per_cpu(cpuid4_info, i)) { 536 if (i != cpu && per_cpu(cpuid4_info, i)) {
518 sibling_leaf = CPUID4_INFO_IDX(i, index); 537 sibling_leaf =
519 cpu_set(cpu, sibling_leaf->shared_cpu_map); 538 CPUID4_INFO_IDX(i, index);
539 cpumask_set_cpu(cpu, to_cpumask(
540 sibling_leaf->shared_cpu_map));
520 } 541 }
521 } 542 }
522 } 543 }
@@ -528,9 +549,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
528 int sibling; 549 int sibling;
529 550
530 this_leaf = CPUID4_INFO_IDX(cpu, index); 551 this_leaf = CPUID4_INFO_IDX(cpu, index);
531 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { 552 for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
532 sibling_leaf = CPUID4_INFO_IDX(sibling, index); 553 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
533 cpu_clear(cpu, sibling_leaf->shared_cpu_map); 554 cpumask_clear_cpu(cpu,
555 to_cpumask(sibling_leaf->shared_cpu_map));
534 } 556 }
535} 557}
536#else 558#else
@@ -635,8 +657,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
635 int n = 0; 657 int n = 0;
636 658
637 if (len > 1) { 659 if (len > 1) {
638 cpumask_t *mask = &this_leaf->shared_cpu_map; 660 const struct cpumask *mask;
639 661
662 mask = to_cpumask(this_leaf->shared_cpu_map);
640 n = type? 663 n = type?
641 cpulist_scnprintf(buf, len-2, mask) : 664 cpulist_scnprintf(buf, len-2, mask) :
642 cpumask_scnprintf(buf, len-2, mask); 665 cpumask_scnprintf(buf, len-2, mask);
@@ -699,7 +722,8 @@ static struct pci_dev *get_k8_northbridge(int node)
699 722
700static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) 723static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
701{ 724{
702 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 725 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
726 int node = cpu_to_node(cpumask_first(mask));
703 struct pci_dev *dev = NULL; 727 struct pci_dev *dev = NULL;
704 ssize_t ret = 0; 728 ssize_t ret = 0;
705 int i; 729 int i;
@@ -733,7 +757,8 @@ static ssize_t
733store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, 757store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
734 size_t count) 758 size_t count)
735{ 759{
736 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 760 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
761 int node = cpu_to_node(cpumask_first(mask));
737 struct pci_dev *dev = NULL; 762 struct pci_dev *dev = NULL;
738 unsigned int ret, index, val; 763 unsigned int ret, index, val;
739 764
@@ -878,7 +903,7 @@ err_out:
878 return -ENOMEM; 903 return -ENOMEM;
879} 904}
880 905
881static cpumask_t cache_dev_map = CPU_MASK_NONE; 906static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
882 907
883/* Add/Remove cache interface for CPU device */ 908/* Add/Remove cache interface for CPU device */
884static int __cpuinit cache_add_dev(struct sys_device * sys_dev) 909static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
@@ -918,7 +943,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
918 } 943 }
919 kobject_uevent(&(this_object->kobj), KOBJ_ADD); 944 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
920 } 945 }
921 cpu_set(cpu, cache_dev_map); 946 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
922 947
923 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); 948 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
924 return 0; 949 return 0;
@@ -931,9 +956,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
931 956
932 if (per_cpu(cpuid4_info, cpu) == NULL) 957 if (per_cpu(cpuid4_info, cpu) == NULL)
933 return; 958 return;
934 if (!cpu_isset(cpu, cache_dev_map)) 959 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
935 return; 960 return;
936 cpu_clear(cpu, cache_dev_map); 961 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
937 962
938 for (i = 0; i < num_cache_leaves; i++) 963 for (i = 0; i < num_cache_leaves; i++)
939 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); 964 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index d7d2323bbb6..b2f89829bbe 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o
4obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o 4obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o
5obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o 5obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
6obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o 6obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
7obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index dfaebce3633..3552119b091 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -60,20 +60,6 @@ void mcheck_init(struct cpuinfo_x86 *c)
60 } 60 }
61} 61}
62 62
63static unsigned long old_cr4 __initdata;
64
65void __init stop_mce(void)
66{
67 old_cr4 = read_cr4();
68 clear_in_cr4(X86_CR4_MCE);
69}
70
71void __init restart_mce(void)
72{
73 if (old_cr4 & X86_CR4_MCE)
74 set_in_cr4(X86_CR4_MCE);
75}
76
77static int __init mcheck_disable(char *str) 63static int __init mcheck_disable(char *str)
78{ 64{
79 mce_disabled = 1; 65 mce_disabled = 1;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index fe79985ce0f..ca14604611e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -3,6 +3,8 @@
3 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. 3 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
4 * Rest from unknown author(s). 4 * Rest from unknown author(s).
5 * 2004 Andi Kleen. Rewrote most of it. 5 * 2004 Andi Kleen. Rewrote most of it.
6 * Copyright 2008 Intel Corporation
7 * Author: Andi Kleen
6 */ 8 */
7 9
8#include <linux/init.h> 10#include <linux/init.h>
@@ -24,6 +26,9 @@
24#include <linux/ctype.h> 26#include <linux/ctype.h>
25#include <linux/kmod.h> 27#include <linux/kmod.h>
26#include <linux/kdebug.h> 28#include <linux/kdebug.h>
29#include <linux/kobject.h>
30#include <linux/sysfs.h>
31#include <linux/ratelimit.h>
27#include <asm/processor.h> 32#include <asm/processor.h>
28#include <asm/msr.h> 33#include <asm/msr.h>
29#include <asm/mce.h> 34#include <asm/mce.h>
@@ -32,7 +37,6 @@
32#include <asm/idle.h> 37#include <asm/idle.h>
33 38
34#define MISC_MCELOG_MINOR 227 39#define MISC_MCELOG_MINOR 227
35#define NR_SYSFS_BANKS 6
36 40
37atomic_t mce_entry; 41atomic_t mce_entry;
38 42
@@ -47,7 +51,7 @@ static int mce_dont_init;
47 */ 51 */
48static int tolerant = 1; 52static int tolerant = 1;
49static int banks; 53static int banks;
50static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL }; 54static u64 *bank;
51static unsigned long notify_user; 55static unsigned long notify_user;
52static int rip_msr; 56static int rip_msr;
53static int mce_bootlog = -1; 57static int mce_bootlog = -1;
@@ -58,6 +62,19 @@ static char *trigger_argv[2] = { trigger, NULL };
58 62
59static DECLARE_WAIT_QUEUE_HEAD(mce_wait); 63static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
60 64
65/* MCA banks polled by the period polling timer for corrected events */
66DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
67 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
68};
69
70/* Do initial initialization of a struct mce */
71void mce_setup(struct mce *m)
72{
73 memset(m, 0, sizeof(struct mce));
74 m->cpu = smp_processor_id();
75 rdtscll(m->tsc);
76}
77
61/* 78/*
62 * Lockless MCE logging infrastructure. 79 * Lockless MCE logging infrastructure.
63 * This avoids deadlocks on printk locks without having to break locks. Also 80 * This avoids deadlocks on printk locks without having to break locks. Also
@@ -119,11 +136,11 @@ static void print_mce(struct mce *m)
119 print_symbol("{%s}", m->ip); 136 print_symbol("{%s}", m->ip);
120 printk("\n"); 137 printk("\n");
121 } 138 }
122 printk(KERN_EMERG "TSC %Lx ", m->tsc); 139 printk(KERN_EMERG "TSC %llx ", m->tsc);
123 if (m->addr) 140 if (m->addr)
124 printk("ADDR %Lx ", m->addr); 141 printk("ADDR %llx ", m->addr);
125 if (m->misc) 142 if (m->misc)
126 printk("MISC %Lx ", m->misc); 143 printk("MISC %llx ", m->misc);
127 printk("\n"); 144 printk("\n");
128 printk(KERN_EMERG "This is not a software problem!\n"); 145 printk(KERN_EMERG "This is not a software problem!\n");
129 printk(KERN_EMERG "Run through mcelog --ascii to decode " 146 printk(KERN_EMERG "Run through mcelog --ascii to decode "
@@ -149,8 +166,10 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start)
149 panic(msg); 166 panic(msg);
150} 167}
151 168
152static int mce_available(struct cpuinfo_x86 *c) 169int mce_available(struct cpuinfo_x86 *c)
153{ 170{
171 if (mce_dont_init)
172 return 0;
154 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); 173 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
155} 174}
156 175
@@ -172,7 +191,77 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
172} 191}
173 192
174/* 193/*
175 * The actual machine check handler 194 * Poll for corrected events or events that happened before reset.
195 * Those are just logged through /dev/mcelog.
196 *
197 * This is executed in standard interrupt context.
198 */
199void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
200{
201 struct mce m;
202 int i;
203
204 mce_setup(&m);
205
206 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
207 for (i = 0; i < banks; i++) {
208 if (!bank[i] || !test_bit(i, *b))
209 continue;
210
211 m.misc = 0;
212 m.addr = 0;
213 m.bank = i;
214 m.tsc = 0;
215
216 barrier();
217 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
218 if (!(m.status & MCI_STATUS_VAL))
219 continue;
220
221 /*
222 * Uncorrected events are handled by the exception handler
223 * when it is enabled. But when the exception is disabled log
224 * everything.
225 *
226 * TBD do the same check for MCI_STATUS_EN here?
227 */
228 if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
229 continue;
230
231 if (m.status & MCI_STATUS_MISCV)
232 rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
233 if (m.status & MCI_STATUS_ADDRV)
234 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
235
236 if (!(flags & MCP_TIMESTAMP))
237 m.tsc = 0;
238 /*
239 * Don't get the IP here because it's unlikely to
240 * have anything to do with the actual error location.
241 */
242
243 mce_log(&m);
244 add_taint(TAINT_MACHINE_CHECK);
245
246 /*
247 * Clear state for this bank.
248 */
249 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
250 }
251
252 /*
253 * Don't clear MCG_STATUS here because it's only defined for
254 * exceptions.
255 */
256}
257
258/*
259 * The actual machine check handler. This only handles real
260 * exceptions when something got corrupted coming in through int 18.
261 *
262 * This is executed in NMI context not subject to normal locking rules. This
263 * implies that most kernel services cannot be safely used. Don't even
264 * think about putting a printk in there!
176 */ 265 */
177void do_machine_check(struct pt_regs * regs, long error_code) 266void do_machine_check(struct pt_regs * regs, long error_code)
178{ 267{
@@ -190,17 +279,18 @@ void do_machine_check(struct pt_regs * regs, long error_code)
190 * error. 279 * error.
191 */ 280 */
192 int kill_it = 0; 281 int kill_it = 0;
282 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
193 283
194 atomic_inc(&mce_entry); 284 atomic_inc(&mce_entry);
195 285
196 if ((regs 286 if (notify_die(DIE_NMI, "machine check", regs, error_code,
197 && notify_die(DIE_NMI, "machine check", regs, error_code,
198 18, SIGKILL) == NOTIFY_STOP) 287 18, SIGKILL) == NOTIFY_STOP)
199 || !banks) 288 goto out2;
289 if (!banks)
200 goto out2; 290 goto out2;
201 291
202 memset(&m, 0, sizeof(struct mce)); 292 mce_setup(&m);
203 m.cpu = smp_processor_id(); 293
204 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); 294 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
205 /* if the restart IP is not valid, we're done for */ 295 /* if the restart IP is not valid, we're done for */
206 if (!(m.mcgstatus & MCG_STATUS_RIPV)) 296 if (!(m.mcgstatus & MCG_STATUS_RIPV))
@@ -210,18 +300,32 @@ void do_machine_check(struct pt_regs * regs, long error_code)
210 barrier(); 300 barrier();
211 301
212 for (i = 0; i < banks; i++) { 302 for (i = 0; i < banks; i++) {
213 if (i < NR_SYSFS_BANKS && !bank[i]) 303 __clear_bit(i, toclear);
304 if (!bank[i])
214 continue; 305 continue;
215 306
216 m.misc = 0; 307 m.misc = 0;
217 m.addr = 0; 308 m.addr = 0;
218 m.bank = i; 309 m.bank = i;
219 m.tsc = 0;
220 310
221 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); 311 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
222 if ((m.status & MCI_STATUS_VAL) == 0) 312 if ((m.status & MCI_STATUS_VAL) == 0)
223 continue; 313 continue;
224 314
315 /*
316 * Non uncorrected errors are handled by machine_check_poll
317 * Leave them alone.
318 */
319 if ((m.status & MCI_STATUS_UC) == 0)
320 continue;
321
322 /*
323 * Set taint even when machine check was not enabled.
324 */
325 add_taint(TAINT_MACHINE_CHECK);
326
327 __set_bit(i, toclear);
328
225 if (m.status & MCI_STATUS_EN) { 329 if (m.status & MCI_STATUS_EN) {
226 /* if PCC was set, there's no way out */ 330 /* if PCC was set, there's no way out */
227 no_way_out |= !!(m.status & MCI_STATUS_PCC); 331 no_way_out |= !!(m.status & MCI_STATUS_PCC);
@@ -235,6 +339,12 @@ void do_machine_check(struct pt_regs * regs, long error_code)
235 no_way_out = 1; 339 no_way_out = 1;
236 kill_it = 1; 340 kill_it = 1;
237 } 341 }
342 } else {
343 /*
344 * Machine check event was not enabled. Clear, but
345 * ignore.
346 */
347 continue;
238 } 348 }
239 349
240 if (m.status & MCI_STATUS_MISCV) 350 if (m.status & MCI_STATUS_MISCV)
@@ -243,10 +353,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
243 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); 353 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
244 354
245 mce_get_rip(&m, regs); 355 mce_get_rip(&m, regs);
246 if (error_code >= 0) 356 mce_log(&m);
247 rdtscll(m.tsc);
248 if (error_code != -2)
249 mce_log(&m);
250 357
251 /* Did this bank cause the exception? */ 358 /* Did this bank cause the exception? */
252 /* Assume that the bank with uncorrectable errors did it, 359 /* Assume that the bank with uncorrectable errors did it,
@@ -255,14 +362,8 @@ void do_machine_check(struct pt_regs * regs, long error_code)
255 panicm = m; 362 panicm = m;
256 panicm_found = 1; 363 panicm_found = 1;
257 } 364 }
258
259 add_taint(TAINT_MACHINE_CHECK);
260 } 365 }
261 366
262 /* Never do anything final in the polling timer */
263 if (!regs)
264 goto out;
265
266 /* If we didn't find an uncorrectable error, pick 367 /* If we didn't find an uncorrectable error, pick
267 the last one (shouldn't happen, just being safe). */ 368 the last one (shouldn't happen, just being safe). */
268 if (!panicm_found) 369 if (!panicm_found)
@@ -309,10 +410,11 @@ void do_machine_check(struct pt_regs * regs, long error_code)
309 /* notify userspace ASAP */ 410 /* notify userspace ASAP */
310 set_thread_flag(TIF_MCE_NOTIFY); 411 set_thread_flag(TIF_MCE_NOTIFY);
311 412
312 out:
313 /* the last thing we do is clear state */ 413 /* the last thing we do is clear state */
314 for (i = 0; i < banks; i++) 414 for (i = 0; i < banks; i++) {
315 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 415 if (test_bit(i, toclear))
416 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
417 }
316 wrmsrl(MSR_IA32_MCG_STATUS, 0); 418 wrmsrl(MSR_IA32_MCG_STATUS, 0);
317 out2: 419 out2:
318 atomic_dec(&mce_entry); 420 atomic_dec(&mce_entry);
@@ -332,15 +434,13 @@ void do_machine_check(struct pt_regs * regs, long error_code)
332 * and historically has been the register value of the 434 * and historically has been the register value of the
333 * MSR_IA32_THERMAL_STATUS (Intel) msr. 435 * MSR_IA32_THERMAL_STATUS (Intel) msr.
334 */ 436 */
335void mce_log_therm_throt_event(unsigned int cpu, __u64 status) 437void mce_log_therm_throt_event(__u64 status)
336{ 438{
337 struct mce m; 439 struct mce m;
338 440
339 memset(&m, 0, sizeof(m)); 441 mce_setup(&m);
340 m.cpu = cpu;
341 m.bank = MCE_THERMAL_BANK; 442 m.bank = MCE_THERMAL_BANK;
342 m.status = status; 443 m.status = status;
343 rdtscll(m.tsc);
344 mce_log(&m); 444 mce_log(&m);
345} 445}
346#endif /* CONFIG_X86_MCE_INTEL */ 446#endif /* CONFIG_X86_MCE_INTEL */
@@ -353,18 +453,18 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
353 453
354static int check_interval = 5 * 60; /* 5 minutes */ 454static int check_interval = 5 * 60; /* 5 minutes */
355static int next_interval; /* in jiffies */ 455static int next_interval; /* in jiffies */
356static void mcheck_timer(struct work_struct *work); 456static void mcheck_timer(unsigned long);
357static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); 457static DEFINE_PER_CPU(struct timer_list, mce_timer);
358 458
359static void mcheck_check_cpu(void *info) 459static void mcheck_timer(unsigned long data)
360{ 460{
361 if (mce_available(&current_cpu_data)) 461 struct timer_list *t = &per_cpu(mce_timer, data);
362 do_machine_check(NULL, 0);
363}
364 462
365static void mcheck_timer(struct work_struct *work) 463 WARN_ON(smp_processor_id() != data);
366{ 464
367 on_each_cpu(mcheck_check_cpu, NULL, 1); 465 if (mce_available(&current_cpu_data))
466 machine_check_poll(MCP_TIMESTAMP,
467 &__get_cpu_var(mce_poll_banks));
368 468
369 /* 469 /*
370 * Alert userspace if needed. If we logged an MCE, reduce the 470 * Alert userspace if needed. If we logged an MCE, reduce the
@@ -377,31 +477,41 @@ static void mcheck_timer(struct work_struct *work)
377 (int)round_jiffies_relative(check_interval*HZ)); 477 (int)round_jiffies_relative(check_interval*HZ));
378 } 478 }
379 479
380 schedule_delayed_work(&mcheck_work, next_interval); 480 t->expires = jiffies + next_interval;
481 add_timer(t);
482}
483
484static void mce_do_trigger(struct work_struct *work)
485{
486 call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
381} 487}
382 488
489static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
490
383/* 491/*
384 * This is only called from process context. This is where we do 492 * Notify the user(s) about new machine check events.
385 * anything we need to alert userspace about new MCEs. This is called 493 * Can be called from interrupt context, but not from machine check/NMI
386 * directly from the poller and also from entry.S and idle, thanks to 494 * context.
387 * TIF_MCE_NOTIFY.
388 */ 495 */
389int mce_notify_user(void) 496int mce_notify_user(void)
390{ 497{
498 /* Not more than two messages every minute */
499 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
500
391 clear_thread_flag(TIF_MCE_NOTIFY); 501 clear_thread_flag(TIF_MCE_NOTIFY);
392 if (test_and_clear_bit(0, &notify_user)) { 502 if (test_and_clear_bit(0, &notify_user)) {
393 static unsigned long last_print;
394 unsigned long now = jiffies;
395
396 wake_up_interruptible(&mce_wait); 503 wake_up_interruptible(&mce_wait);
397 if (trigger[0])
398 call_usermodehelper(trigger, trigger_argv, NULL,
399 UMH_NO_WAIT);
400 504
401 if (time_after_eq(now, last_print + (check_interval*HZ))) { 505 /*
402 last_print = now; 506 * There is no risk of missing notifications because
507 * work_pending is always cleared before the function is
508 * executed.
509 */
510 if (trigger[0] && !work_pending(&mce_trigger_work))
511 schedule_work(&mce_trigger_work);
512
513 if (__ratelimit(&ratelimit))
403 printk(KERN_INFO "Machine check events logged\n"); 514 printk(KERN_INFO "Machine check events logged\n");
404 }
405 515
406 return 1; 516 return 1;
407 } 517 }
@@ -425,63 +535,78 @@ static struct notifier_block mce_idle_notifier = {
425 535
426static __init int periodic_mcheck_init(void) 536static __init int periodic_mcheck_init(void)
427{ 537{
428 next_interval = check_interval * HZ; 538 idle_notifier_register(&mce_idle_notifier);
429 if (next_interval) 539 return 0;
430 schedule_delayed_work(&mcheck_work,
431 round_jiffies_relative(next_interval));
432 idle_notifier_register(&mce_idle_notifier);
433 return 0;
434} 540}
435__initcall(periodic_mcheck_init); 541__initcall(periodic_mcheck_init);
436 542
437
438/* 543/*
439 * Initialize Machine Checks for a CPU. 544 * Initialize Machine Checks for a CPU.
440 */ 545 */
441static void mce_init(void *dummy) 546static int mce_cap_init(void)
442{ 547{
443 u64 cap; 548 u64 cap;
444 int i; 549 unsigned b;
445 550
446 rdmsrl(MSR_IA32_MCG_CAP, cap); 551 rdmsrl(MSR_IA32_MCG_CAP, cap);
447 banks = cap & 0xff; 552 b = cap & 0xff;
448 if (banks > MCE_EXTENDED_BANK) { 553 if (b > MAX_NR_BANKS) {
449 banks = MCE_EXTENDED_BANK; 554 printk(KERN_WARNING
450 printk(KERN_INFO "MCE: warning: using only %d banks\n", 555 "MCE: Using only %u machine check banks out of %u\n",
451 MCE_EXTENDED_BANK); 556 MAX_NR_BANKS, b);
557 b = MAX_NR_BANKS;
452 } 558 }
559
560 /* Don't support asymmetric configurations today */
561 WARN_ON(banks != 0 && b != banks);
562 banks = b;
563 if (!bank) {
564 bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
565 if (!bank)
566 return -ENOMEM;
567 memset(bank, 0xff, banks * sizeof(u64));
568 }
569
453 /* Use accurate RIP reporting if available. */ 570 /* Use accurate RIP reporting if available. */
454 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) 571 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
455 rip_msr = MSR_IA32_MCG_EIP; 572 rip_msr = MSR_IA32_MCG_EIP;
456 573
457 /* Log the machine checks left over from the previous reset. 574 return 0;
458 This also clears all registers */ 575}
459 do_machine_check(NULL, mce_bootlog ? -1 : -2); 576
577static void mce_init(void *dummy)
578{
579 u64 cap;
580 int i;
581 mce_banks_t all_banks;
582
583 /*
584 * Log the machine checks left over from the previous reset.
585 */
586 bitmap_fill(all_banks, MAX_NR_BANKS);
587 machine_check_poll(MCP_UC, &all_banks);
460 588
461 set_in_cr4(X86_CR4_MCE); 589 set_in_cr4(X86_CR4_MCE);
462 590
591 rdmsrl(MSR_IA32_MCG_CAP, cap);
463 if (cap & MCG_CTL_P) 592 if (cap & MCG_CTL_P)
464 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 593 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
465 594
466 for (i = 0; i < banks; i++) { 595 for (i = 0; i < banks; i++) {
467 if (i < NR_SYSFS_BANKS) 596 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
468 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
469 else
470 wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
471
472 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 597 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
473 } 598 }
474} 599}
475 600
476/* Add per CPU specific workarounds here */ 601/* Add per CPU specific workarounds here */
477static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) 602static void mce_cpu_quirks(struct cpuinfo_x86 *c)
478{ 603{
479 /* This should be disabled by the BIOS, but isn't always */ 604 /* This should be disabled by the BIOS, but isn't always */
480 if (c->x86_vendor == X86_VENDOR_AMD) { 605 if (c->x86_vendor == X86_VENDOR_AMD) {
481 if(c->x86 == 15) 606 if (c->x86 == 15 && banks > 4)
482 /* disable GART TBL walk error reporting, which trips off 607 /* disable GART TBL walk error reporting, which trips off
483 incorrectly with the IOMMU & 3ware & Cerberus. */ 608 incorrectly with the IOMMU & 3ware & Cerberus. */
484 clear_bit(10, &bank[4]); 609 clear_bit(10, (unsigned long *)&bank[4]);
485 if(c->x86 <= 17 && mce_bootlog < 0) 610 if(c->x86 <= 17 && mce_bootlog < 0)
486 /* Lots of broken BIOS around that don't clear them 611 /* Lots of broken BIOS around that don't clear them
487 by default and leave crap in there. Don't log. */ 612 by default and leave crap in there. Don't log. */
@@ -504,20 +629,38 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
504 } 629 }
505} 630}
506 631
632static void mce_init_timer(void)
633{
634 struct timer_list *t = &__get_cpu_var(mce_timer);
635
636 /* data race harmless because everyone sets to the same value */
637 if (!next_interval)
638 next_interval = check_interval * HZ;
639 if (!next_interval)
640 return;
641 setup_timer(t, mcheck_timer, smp_processor_id());
642 t->expires = round_jiffies(jiffies + next_interval);
643 add_timer(t);
644}
645
507/* 646/*
508 * Called for each booted CPU to set up machine checks. 647 * Called for each booted CPU to set up machine checks.
509 * Must be called with preempt off. 648 * Must be called with preempt off.
510 */ 649 */
511void __cpuinit mcheck_init(struct cpuinfo_x86 *c) 650void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
512{ 651{
513 mce_cpu_quirks(c); 652 if (!mce_available(c))
653 return;
514 654
515 if (mce_dont_init || 655 if (mce_cap_init() < 0) {
516 !mce_available(c)) 656 mce_dont_init = 1;
517 return; 657 return;
658 }
659 mce_cpu_quirks(c);
518 660
519 mce_init(NULL); 661 mce_init(NULL);
520 mce_cpu_features(c); 662 mce_cpu_features(c);
663 mce_init_timer();
521} 664}
522 665
523/* 666/*
@@ -573,7 +716,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
573{ 716{
574 unsigned long *cpu_tsc; 717 unsigned long *cpu_tsc;
575 static DEFINE_MUTEX(mce_read_mutex); 718 static DEFINE_MUTEX(mce_read_mutex);
576 unsigned next; 719 unsigned prev, next;
577 char __user *buf = ubuf; 720 char __user *buf = ubuf;
578 int i, err; 721 int i, err;
579 722
@@ -592,25 +735,32 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
592 } 735 }
593 736
594 err = 0; 737 err = 0;
595 for (i = 0; i < next; i++) { 738 prev = 0;
596 unsigned long start = jiffies; 739 do {
597 740 for (i = prev; i < next; i++) {
598 while (!mcelog.entry[i].finished) { 741 unsigned long start = jiffies;
599 if (time_after_eq(jiffies, start + 2)) { 742
600 memset(mcelog.entry + i,0, sizeof(struct mce)); 743 while (!mcelog.entry[i].finished) {
601 goto timeout; 744 if (time_after_eq(jiffies, start + 2)) {
745 memset(mcelog.entry + i, 0,
746 sizeof(struct mce));
747 goto timeout;
748 }
749 cpu_relax();
602 } 750 }
603 cpu_relax(); 751 smp_rmb();
752 err |= copy_to_user(buf, mcelog.entry + i,
753 sizeof(struct mce));
754 buf += sizeof(struct mce);
755timeout:
756 ;
604 } 757 }
605 smp_rmb();
606 err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
607 buf += sizeof(struct mce);
608 timeout:
609 ;
610 }
611 758
612 memset(mcelog.entry, 0, next * sizeof(struct mce)); 759 memset(mcelog.entry + prev, 0,
613 mcelog.next = 0; 760 (next - prev) * sizeof(struct mce));
761 prev = next;
762 next = cmpxchg(&mcelog.next, prev, 0);
763 } while (next != prev);
614 764
615 synchronize_sched(); 765 synchronize_sched();
616 766
@@ -680,20 +830,6 @@ static struct miscdevice mce_log_device = {
680 &mce_chrdev_ops, 830 &mce_chrdev_ops,
681}; 831};
682 832
683static unsigned long old_cr4 __initdata;
684
685void __init stop_mce(void)
686{
687 old_cr4 = read_cr4();
688 clear_in_cr4(X86_CR4_MCE);
689}
690
691void __init restart_mce(void)
692{
693 if (old_cr4 & X86_CR4_MCE)
694 set_in_cr4(X86_CR4_MCE);
695}
696
697/* 833/*
698 * Old style boot options parsing. Only for compatibility. 834 * Old style boot options parsing. Only for compatibility.
699 */ 835 */
@@ -703,8 +839,7 @@ static int __init mcheck_disable(char *str)
703 return 1; 839 return 1;
704} 840}
705 841
706/* mce=off disables machine check. Note you can re-enable it later 842/* mce=off disables machine check.
707 using sysfs.
708 mce=TOLERANCELEVEL (number, see above) 843 mce=TOLERANCELEVEL (number, see above)
709 mce=bootlog Log MCEs from before booting. Disabled by default on AMD. 844 mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
710 mce=nobootlog Don't log MCEs from before booting. */ 845 mce=nobootlog Don't log MCEs from before booting. */
@@ -728,6 +863,29 @@ __setup("mce=", mcheck_enable);
728 * Sysfs support 863 * Sysfs support
729 */ 864 */
730 865
866/*
867 * Disable machine checks on suspend and shutdown. We can't really handle
868 * them later.
869 */
870static int mce_disable(void)
871{
872 int i;
873
874 for (i = 0; i < banks; i++)
875 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
876 return 0;
877}
878
879static int mce_suspend(struct sys_device *dev, pm_message_t state)
880{
881 return mce_disable();
882}
883
884static int mce_shutdown(struct sys_device *dev)
885{
886 return mce_disable();
887}
888
731/* On resume clear all MCE state. Don't want to see leftovers from the BIOS. 889/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
732 Only one CPU is active at this time, the others get readded later using 890 Only one CPU is active at this time, the others get readded later using
733 CPU hotplug. */ 891 CPU hotplug. */
@@ -738,20 +896,24 @@ static int mce_resume(struct sys_device *dev)
738 return 0; 896 return 0;
739} 897}
740 898
899static void mce_cpu_restart(void *data)
900{
901 del_timer_sync(&__get_cpu_var(mce_timer));
902 if (mce_available(&current_cpu_data))
903 mce_init(NULL);
904 mce_init_timer();
905}
906
741/* Reinit MCEs after user configuration changes */ 907/* Reinit MCEs after user configuration changes */
742static void mce_restart(void) 908static void mce_restart(void)
743{ 909{
744 if (next_interval)
745 cancel_delayed_work(&mcheck_work);
746 /* Timer race is harmless here */
747 on_each_cpu(mce_init, NULL, 1);
748 next_interval = check_interval * HZ; 910 next_interval = check_interval * HZ;
749 if (next_interval) 911 on_each_cpu(mce_cpu_restart, NULL, 1);
750 schedule_delayed_work(&mcheck_work,
751 round_jiffies_relative(next_interval));
752} 912}
753 913
754static struct sysdev_class mce_sysclass = { 914static struct sysdev_class mce_sysclass = {
915 .suspend = mce_suspend,
916 .shutdown = mce_shutdown,
755 .resume = mce_resume, 917 .resume = mce_resume,
756 .name = "machinecheck", 918 .name = "machinecheck",
757}; 919};
@@ -778,16 +940,26 @@ void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinit
778 } \ 940 } \
779 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); 941 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
780 942
781/* 943static struct sysdev_attribute *bank_attrs;
782 * TBD should generate these dynamically based on number of available banks. 944
783 * Have only 6 contol banks in /sysfs until then. 945static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
784 */ 946 char *buf)
785ACCESSOR(bank0ctl,bank[0],mce_restart()) 947{
786ACCESSOR(bank1ctl,bank[1],mce_restart()) 948 u64 b = bank[attr - bank_attrs];
787ACCESSOR(bank2ctl,bank[2],mce_restart()) 949 return sprintf(buf, "%llx\n", b);
788ACCESSOR(bank3ctl,bank[3],mce_restart()) 950}
789ACCESSOR(bank4ctl,bank[4],mce_restart()) 951
790ACCESSOR(bank5ctl,bank[5],mce_restart()) 952static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
953 const char *buf, size_t siz)
954{
955 char *end;
956 u64 new = simple_strtoull(buf, &end, 0);
957 if (end == buf)
958 return -EINVAL;
959 bank[attr - bank_attrs] = new;
960 mce_restart();
961 return end-buf;
962}
791 963
792static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, 964static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
793 char *buf) 965 char *buf)
@@ -814,8 +986,6 @@ static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
814static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); 986static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
815ACCESSOR(check_interval,check_interval,mce_restart()) 987ACCESSOR(check_interval,check_interval,mce_restart())
816static struct sysdev_attribute *mce_attributes[] = { 988static struct sysdev_attribute *mce_attributes[] = {
817 &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
818 &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
819 &attr_tolerant.attr, &attr_check_interval, &attr_trigger, 989 &attr_tolerant.attr, &attr_check_interval, &attr_trigger,
820 NULL 990 NULL
821}; 991};
@@ -845,11 +1015,22 @@ static __cpuinit int mce_create_device(unsigned int cpu)
845 if (err) 1015 if (err)
846 goto error; 1016 goto error;
847 } 1017 }
1018 for (i = 0; i < banks; i++) {
1019 err = sysdev_create_file(&per_cpu(device_mce, cpu),
1020 &bank_attrs[i]);
1021 if (err)
1022 goto error2;
1023 }
848 cpu_set(cpu, mce_device_initialized); 1024 cpu_set(cpu, mce_device_initialized);
849 1025
850 return 0; 1026 return 0;
1027error2:
1028 while (--i >= 0) {
1029 sysdev_remove_file(&per_cpu(device_mce, cpu),
1030 &bank_attrs[i]);
1031 }
851error: 1032error:
852 while (i--) { 1033 while (--i >= 0) {
853 sysdev_remove_file(&per_cpu(device_mce,cpu), 1034 sysdev_remove_file(&per_cpu(device_mce,cpu),
854 mce_attributes[i]); 1035 mce_attributes[i]);
855 } 1036 }
@@ -868,15 +1049,46 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
868 for (i = 0; mce_attributes[i]; i++) 1049 for (i = 0; mce_attributes[i]; i++)
869 sysdev_remove_file(&per_cpu(device_mce,cpu), 1050 sysdev_remove_file(&per_cpu(device_mce,cpu),
870 mce_attributes[i]); 1051 mce_attributes[i]);
1052 for (i = 0; i < banks; i++)
1053 sysdev_remove_file(&per_cpu(device_mce, cpu),
1054 &bank_attrs[i]);
871 sysdev_unregister(&per_cpu(device_mce,cpu)); 1055 sysdev_unregister(&per_cpu(device_mce,cpu));
872 cpu_clear(cpu, mce_device_initialized); 1056 cpu_clear(cpu, mce_device_initialized);
873} 1057}
874 1058
1059/* Make sure there are no machine checks on offlined CPUs. */
1060static void mce_disable_cpu(void *h)
1061{
1062 int i;
1063 unsigned long action = *(unsigned long *)h;
1064
1065 if (!mce_available(&current_cpu_data))
1066 return;
1067 if (!(action & CPU_TASKS_FROZEN))
1068 cmci_clear();
1069 for (i = 0; i < banks; i++)
1070 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
1071}
1072
1073static void mce_reenable_cpu(void *h)
1074{
1075 int i;
1076 unsigned long action = *(unsigned long *)h;
1077
1078 if (!mce_available(&current_cpu_data))
1079 return;
1080 if (!(action & CPU_TASKS_FROZEN))
1081 cmci_reenable();
1082 for (i = 0; i < banks; i++)
1083 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
1084}
1085
875/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 1086/* Get notified when a cpu comes on/off. Be hotplug friendly. */
876static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, 1087static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
877 unsigned long action, void *hcpu) 1088 unsigned long action, void *hcpu)
878{ 1089{
879 unsigned int cpu = (unsigned long)hcpu; 1090 unsigned int cpu = (unsigned long)hcpu;
1091 struct timer_list *t = &per_cpu(mce_timer, cpu);
880 1092
881 switch (action) { 1093 switch (action) {
882 case CPU_ONLINE: 1094 case CPU_ONLINE:
@@ -891,6 +1103,21 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
891 threshold_cpu_callback(action, cpu); 1103 threshold_cpu_callback(action, cpu);
892 mce_remove_device(cpu); 1104 mce_remove_device(cpu);
893 break; 1105 break;
1106 case CPU_DOWN_PREPARE:
1107 case CPU_DOWN_PREPARE_FROZEN:
1108 del_timer_sync(t);
1109 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
1110 break;
1111 case CPU_DOWN_FAILED:
1112 case CPU_DOWN_FAILED_FROZEN:
1113 t->expires = round_jiffies(jiffies + next_interval);
1114 add_timer_on(t, cpu);
1115 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
1116 break;
1117 case CPU_POST_DEAD:
1118 /* intentionally ignoring frozen here */
1119 cmci_rediscover(cpu);
1120 break;
894 } 1121 }
895 return NOTIFY_OK; 1122 return NOTIFY_OK;
896} 1123}
@@ -899,6 +1126,34 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = {
899 .notifier_call = mce_cpu_callback, 1126 .notifier_call = mce_cpu_callback,
900}; 1127};
901 1128
1129static __init int mce_init_banks(void)
1130{
1131 int i;
1132
1133 bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
1134 GFP_KERNEL);
1135 if (!bank_attrs)
1136 return -ENOMEM;
1137
1138 for (i = 0; i < banks; i++) {
1139 struct sysdev_attribute *a = &bank_attrs[i];
1140 a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
1141 if (!a->attr.name)
1142 goto nomem;
1143 a->attr.mode = 0644;
1144 a->show = show_bank;
1145 a->store = set_bank;
1146 }
1147 return 0;
1148
1149nomem:
1150 while (--i >= 0)
1151 kfree(bank_attrs[i].attr.name);
1152 kfree(bank_attrs);
1153 bank_attrs = NULL;
1154 return -ENOMEM;
1155}
1156
902static __init int mce_init_device(void) 1157static __init int mce_init_device(void)
903{ 1158{
904 int err; 1159 int err;
@@ -906,6 +1161,11 @@ static __init int mce_init_device(void)
906 1161
907 if (!mce_available(&boot_cpu_data)) 1162 if (!mce_available(&boot_cpu_data))
908 return -EIO; 1163 return -EIO;
1164
1165 err = mce_init_banks();
1166 if (err)
1167 return err;
1168
909 err = sysdev_class_register(&mce_sysclass); 1169 err = sysdev_class_register(&mce_sysclass);
910 if (err) 1170 if (err)
911 return err; 1171 return err;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index f2ee0ae29bd..c5a32f92d07 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = {
67struct threshold_bank { 67struct threshold_bank {
68 struct kobject *kobj; 68 struct kobject *kobj;
69 struct threshold_block *blocks; 69 struct threshold_block *blocks;
70 cpumask_t cpus; 70 cpumask_var_t cpus;
71}; 71};
72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); 72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
73 73
@@ -79,6 +79,8 @@ static unsigned char shared_bank[NR_BANKS] = {
79 79
80static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ 80static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
81 81
82static void amd_threshold_interrupt(void);
83
82/* 84/*
83 * CPU Initialization 85 * CPU Initialization
84 */ 86 */
@@ -174,6 +176,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
174 tr.reset = 0; 176 tr.reset = 0;
175 tr.old_limit = 0; 177 tr.old_limit = 0;
176 threshold_restart_bank(&tr); 178 threshold_restart_bank(&tr);
179
180 mce_threshold_vector = amd_threshold_interrupt;
177 } 181 }
178 } 182 }
179} 183}
@@ -187,19 +191,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
187 * the interrupt goes off when error_count reaches threshold_limit. 191 * the interrupt goes off when error_count reaches threshold_limit.
188 * the handler will simply log mcelog w/ software defined bank number. 192 * the handler will simply log mcelog w/ software defined bank number.
189 */ 193 */
190asmlinkage void mce_threshold_interrupt(void) 194static void amd_threshold_interrupt(void)
191{ 195{
192 unsigned int bank, block; 196 unsigned int bank, block;
193 struct mce m; 197 struct mce m;
194 u32 low = 0, high = 0, address = 0; 198 u32 low = 0, high = 0, address = 0;
195 199
196 ack_APIC_irq(); 200 mce_setup(&m);
197 exit_idle();
198 irq_enter();
199
200 memset(&m, 0, sizeof(m));
201 rdtscll(m.tsc);
202 m.cpu = smp_processor_id();
203 201
204 /* assume first bank caused it */ 202 /* assume first bank caused it */
205 for (bank = 0; bank < NR_BANKS; ++bank) { 203 for (bank = 0; bank < NR_BANKS; ++bank) {
@@ -233,7 +231,8 @@ asmlinkage void mce_threshold_interrupt(void)
233 231
234 /* Log the machine check that caused the threshold 232 /* Log the machine check that caused the threshold
235 event. */ 233 event. */
236 do_machine_check(NULL, 0); 234 machine_check_poll(MCP_TIMESTAMP,
235 &__get_cpu_var(mce_poll_banks));
237 236
238 if (high & MASK_OVERFLOW_HI) { 237 if (high & MASK_OVERFLOW_HI) {
239 rdmsrl(address, m.misc); 238 rdmsrl(address, m.misc);
@@ -243,13 +242,10 @@ asmlinkage void mce_threshold_interrupt(void)
243 + bank * NR_BLOCKS 242 + bank * NR_BLOCKS
244 + block; 243 + block;
245 mce_log(&m); 244 mce_log(&m);
246 goto out; 245 return;
247 } 246 }
248 } 247 }
249 } 248 }
250out:
251 inc_irq_stat(irq_threshold_count);
252 irq_exit();
253} 249}
254 250
255/* 251/*
@@ -481,7 +477,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
481 477
482#ifdef CONFIG_SMP 478#ifdef CONFIG_SMP
483 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 479 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
484 i = first_cpu(per_cpu(cpu_core_map, cpu)); 480 i = cpumask_first(&per_cpu(cpu_core_map, cpu));
485 481
486 /* first core not up yet */ 482 /* first core not up yet */
487 if (cpu_data(i).cpu_core_id) 483 if (cpu_data(i).cpu_core_id)
@@ -501,7 +497,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
501 if (err) 497 if (err)
502 goto out; 498 goto out;
503 499
504 b->cpus = per_cpu(cpu_core_map, cpu); 500 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
505 per_cpu(threshold_banks, cpu)[bank] = b; 501 per_cpu(threshold_banks, cpu)[bank] = b;
506 goto out; 502 goto out;
507 } 503 }
@@ -512,15 +508,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
512 err = -ENOMEM; 508 err = -ENOMEM;
513 goto out; 509 goto out;
514 } 510 }
511 if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
512 kfree(b);
513 err = -ENOMEM;
514 goto out;
515 }
515 516
516 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); 517 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
517 if (!b->kobj) 518 if (!b->kobj)
518 goto out_free; 519 goto out_free;
519 520
520#ifndef CONFIG_SMP 521#ifndef CONFIG_SMP
521 b->cpus = CPU_MASK_ALL; 522 cpumask_setall(b->cpus);
522#else 523#else
523 b->cpus = per_cpu(cpu_core_map, cpu); 524 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
524#endif 525#endif
525 526
526 per_cpu(threshold_banks, cpu)[bank] = b; 527 per_cpu(threshold_banks, cpu)[bank] = b;
@@ -529,7 +530,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
529 if (err) 530 if (err)
530 goto out_free; 531 goto out_free;
531 532
532 for_each_cpu_mask_nr(i, b->cpus) { 533 for_each_cpu(i, b->cpus) {
533 if (i == cpu) 534 if (i == cpu)
534 continue; 535 continue;
535 536
@@ -545,6 +546,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
545 546
546out_free: 547out_free:
547 per_cpu(threshold_banks, cpu)[bank] = NULL; 548 per_cpu(threshold_banks, cpu)[bank] = NULL;
549 free_cpumask_var(b->cpus);
548 kfree(b); 550 kfree(b);
549out: 551out:
550 return err; 552 return err;
@@ -619,7 +621,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
619#endif 621#endif
620 622
621 /* remove all sibling symlinks before unregistering */ 623 /* remove all sibling symlinks before unregistering */
622 for_each_cpu_mask_nr(i, b->cpus) { 624 for_each_cpu(i, b->cpus) {
623 if (i == cpu) 625 if (i == cpu)
624 continue; 626 continue;
625 627
@@ -632,6 +634,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
632free_out: 634free_out:
633 kobject_del(b->kobj); 635 kobject_del(b->kobj);
634 kobject_put(b->kobj); 636 kobject_put(b->kobj);
637 free_cpumask_var(b->cpus);
635 kfree(b); 638 kfree(b);
636 per_cpu(threshold_banks, cpu)[bank] = NULL; 639 per_cpu(threshold_banks, cpu)[bank] = NULL;
637} 640}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index f44c3662436..aaa7d973093 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -1,17 +1,21 @@
1/* 1/*
2 * Intel specific MCE features. 2 * Intel specific MCE features.
3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> 3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
4 * Copyright (C) 2008, 2009 Intel Corporation
5 * Author: Andi Kleen
4 */ 6 */
5 7
6#include <linux/init.h> 8#include <linux/init.h>
7#include <linux/interrupt.h> 9#include <linux/interrupt.h>
8#include <linux/percpu.h> 10#include <linux/percpu.h>
9#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/apic.h>
10#include <asm/msr.h> 13#include <asm/msr.h>
11#include <asm/mce.h> 14#include <asm/mce.h>
12#include <asm/hw_irq.h> 15#include <asm/hw_irq.h>
13#include <asm/idle.h> 16#include <asm/idle.h>
14#include <asm/therm_throt.h> 17#include <asm/therm_throt.h>
18#include <asm/apic.h>
15 19
16asmlinkage void smp_thermal_interrupt(void) 20asmlinkage void smp_thermal_interrupt(void)
17{ 21{
@@ -24,7 +28,7 @@ asmlinkage void smp_thermal_interrupt(void)
24 28
25 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 29 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
26 if (therm_throt_process(msr_val & 1)) 30 if (therm_throt_process(msr_val & 1))
27 mce_log_therm_throt_event(smp_processor_id(), msr_val); 31 mce_log_therm_throt_event(msr_val);
28 32
29 inc_irq_stat(irq_thermal_count); 33 inc_irq_stat(irq_thermal_count);
30 irq_exit(); 34 irq_exit();
@@ -48,13 +52,13 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
48 */ 52 */
49 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 53 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
50 h = apic_read(APIC_LVTTHMR); 54 h = apic_read(APIC_LVTTHMR);
51 if ((l & (1 << 3)) && (h & APIC_DM_SMI)) { 55 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
52 printk(KERN_DEBUG 56 printk(KERN_DEBUG
53 "CPU%d: Thermal monitoring handled by SMI\n", cpu); 57 "CPU%d: Thermal monitoring handled by SMI\n", cpu);
54 return; 58 return;
55 } 59 }
56 60
57 if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) 61 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
58 tm2 = 1; 62 tm2 = 1;
59 63
60 if (h & APIC_VECTOR_MASK) { 64 if (h & APIC_VECTOR_MASK) {
@@ -72,7 +76,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
72 wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); 76 wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
73 77
74 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 78 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
75 wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); 79 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
76 80
77 l = apic_read(APIC_LVTTHMR); 81 l = apic_read(APIC_LVTTHMR);
78 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 82 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
@@ -84,7 +88,209 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
84 return; 88 return;
85} 89}
86 90
91/*
92 * Support for Intel Correct Machine Check Interrupts. This allows
93 * the CPU to raise an interrupt when a corrected machine check happened.
94 * Normally we pick those up using a regular polling timer.
95 * Also supports reliable discovery of shared banks.
96 */
97
98static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
99
100/*
101 * cmci_discover_lock protects against parallel discovery attempts
102 * which could race against each other.
103 */
104static DEFINE_SPINLOCK(cmci_discover_lock);
105
106#define CMCI_THRESHOLD 1
107
108static int cmci_supported(int *banks)
109{
110 u64 cap;
111
112 /*
113 * Vendor check is not strictly needed, but the initial
114 * initialization is vendor keyed and this
115 * makes sure none of the backdoors are entered otherwise.
116 */
117 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
118 return 0;
119 if (!cpu_has_apic || lapic_get_maxlvt() < 6)
120 return 0;
121 rdmsrl(MSR_IA32_MCG_CAP, cap);
122 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
123 return !!(cap & MCG_CMCI_P);
124}
125
126/*
127 * The interrupt handler. This is called on every event.
128 * Just call the poller directly to log any events.
129 * This could in theory increase the threshold under high load,
130 * but doesn't for now.
131 */
132static void intel_threshold_interrupt(void)
133{
134 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
135 mce_notify_user();
136}
137
138static void print_update(char *type, int *hdr, int num)
139{
140 if (*hdr == 0)
141 printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
142 *hdr = 1;
143 printk(KERN_CONT " %s:%d", type, num);
144}
145
146/*
147 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
148 * on this CPU. Use the algorithm recommended in the SDM to discover shared
149 * banks.
150 */
151static void cmci_discover(int banks, int boot)
152{
153 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
154 int hdr = 0;
155 int i;
156
157 spin_lock(&cmci_discover_lock);
158 for (i = 0; i < banks; i++) {
159 u64 val;
160
161 if (test_bit(i, owned))
162 continue;
163
164 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
165
166 /* Already owned by someone else? */
167 if (val & CMCI_EN) {
168 if (test_and_clear_bit(i, owned) || boot)
169 print_update("SHD", &hdr, i);
170 __clear_bit(i, __get_cpu_var(mce_poll_banks));
171 continue;
172 }
173
174 val |= CMCI_EN | CMCI_THRESHOLD;
175 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
176 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
177
178 /* Did the enable bit stick? -- the bank supports CMCI */
179 if (val & CMCI_EN) {
180 if (!test_and_set_bit(i, owned) || boot)
181 print_update("CMCI", &hdr, i);
182 __clear_bit(i, __get_cpu_var(mce_poll_banks));
183 } else {
184 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
185 }
186 }
187 spin_unlock(&cmci_discover_lock);
188 if (hdr)
189 printk(KERN_CONT "\n");
190}
191
192/*
193 * Just in case we missed an event during initialization check
194 * all the CMCI owned banks.
195 */
196void cmci_recheck(void)
197{
198 unsigned long flags;
199 int banks;
200
201 if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
202 return;
203 local_irq_save(flags);
204 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
205 local_irq_restore(flags);
206}
207
208/*
209 * Disable CMCI on this CPU for all banks it owns when it goes down.
210 * This allows other CPUs to claim the banks on rediscovery.
211 */
212void cmci_clear(void)
213{
214 int i;
215 int banks;
216 u64 val;
217
218 if (!cmci_supported(&banks))
219 return;
220 spin_lock(&cmci_discover_lock);
221 for (i = 0; i < banks; i++) {
222 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
223 continue;
224 /* Disable CMCI */
225 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
226 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
227 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
228 __clear_bit(i, __get_cpu_var(mce_banks_owned));
229 }
230 spin_unlock(&cmci_discover_lock);
231}
232
233/*
234 * After a CPU went down cycle through all the others and rediscover
235 * Must run in process context.
236 */
237void cmci_rediscover(int dying)
238{
239 int banks;
240 int cpu;
241 cpumask_var_t old;
242
243 if (!cmci_supported(&banks))
244 return;
245 if (!alloc_cpumask_var(&old, GFP_KERNEL))
246 return;
247 cpumask_copy(old, &current->cpus_allowed);
248
249 for_each_online_cpu (cpu) {
250 if (cpu == dying)
251 continue;
252 if (set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)))
253 continue;
254 /* Recheck banks in case CPUs don't all have the same */
255 if (cmci_supported(&banks))
256 cmci_discover(banks, 0);
257 }
258
259 set_cpus_allowed_ptr(current, old);
260 free_cpumask_var(old);
261}
262
263/*
264 * Reenable CMCI on this CPU in case a CPU down failed.
265 */
266void cmci_reenable(void)
267{
268 int banks;
269 if (cmci_supported(&banks))
270 cmci_discover(banks, 0);
271}
272
273static __cpuinit void intel_init_cmci(void)
274{
275 int banks;
276
277 if (!cmci_supported(&banks))
278 return;
279
280 mce_threshold_vector = intel_threshold_interrupt;
281 cmci_discover(banks, 1);
282 /*
283 * For CPU #0 this runs with still disabled APIC, but that's
284 * ok because only the vector is set up. We still do another
285 * check for the banks later for CPU #0 just to make sure
286 * to not miss any events.
287 */
288 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
289 cmci_recheck();
290}
291
87void mce_intel_feature_init(struct cpuinfo_x86 *c) 292void mce_intel_feature_init(struct cpuinfo_x86 *c)
88{ 293{
89 intel_init_thermal(c); 294 intel_init_thermal(c);
295 intel_init_cmci();
90} 296}
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index 9b60fce09f7..f53bdcbaf38 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -85,7 +85,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
85 */ 85 */
86 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 86 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
87 h = apic_read(APIC_LVTTHMR); 87 h = apic_read(APIC_LVTTHMR);
88 if ((l & (1<<3)) && (h & APIC_DM_SMI)) { 88 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", 89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
90 cpu); 90 cpu);
91 return; /* -EBUSY */ 91 return; /* -EBUSY */
@@ -111,7 +111,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
111 vendor_thermal_interrupt = intel_thermal_interrupt; 111 vendor_thermal_interrupt = intel_thermal_interrupt;
112 112
113 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 113 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
114 wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); 114 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
115 115
116 l = apic_read(APIC_LVTTHMR); 116 l = apic_read(APIC_LVTTHMR);
117 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 117 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
new file mode 100644
index 00000000000..23ee9e730f7
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -0,0 +1,29 @@
1/*
2 * Common corrected MCE threshold handler code:
3 */
4#include <linux/interrupt.h>
5#include <linux/kernel.h>
6
7#include <asm/irq_vectors.h>
8#include <asm/apic.h>
9#include <asm/idle.h>
10#include <asm/mce.h>
11
12static void default_threshold_interrupt(void)
13{
14 printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
15 THRESHOLD_APIC_VECTOR);
16}
17
18void (*mce_threshold_vector)(void) = default_threshold_interrupt;
19
20asmlinkage void mce_threshold_interrupt(void)
21{
22 exit_idle();
23 irq_enter();
24 inc_irq_stat(irq_threshold_count);
25 mce_threshold_vector();
26 irq_exit();
27 /* Ack only at the end to avoid potential reentry */
28 ack_APIC_irq();
29}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 9abd48b2267..f6c70a164e3 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -19,7 +19,7 @@
19#include <linux/nmi.h> 19#include <linux/nmi.h>
20#include <linux/kprobes.h> 20#include <linux/kprobes.h>
21 21
22#include <asm/apic.h> 22#include <asm/genapic.h>
23#include <asm/intel_arch_perfmon.h> 23#include <asm/intel_arch_perfmon.h>
24 24
25struct nmi_watchdog_ctlblk { 25struct nmi_watchdog_ctlblk {
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 01b1244ef1c..d67e0e48bc2 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -7,11 +7,10 @@
7/* 7/*
8 * Get CPU information for use by the procfs. 8 * Get CPU information for use by the procfs.
9 */ 9 */
10#ifdef CONFIG_X86_32
11static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, 10static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
12 unsigned int cpu) 11 unsigned int cpu)
13{ 12{
14#ifdef CONFIG_X86_HT 13#ifdef CONFIG_SMP
15 if (c->x86_max_cores * smp_num_siblings > 1) { 14 if (c->x86_max_cores * smp_num_siblings > 1) {
16 seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); 15 seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
17 seq_printf(m, "siblings\t: %d\n", 16 seq_printf(m, "siblings\t: %d\n",
@@ -24,6 +23,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
24#endif 23#endif
25} 24}
26 25
26#ifdef CONFIG_X86_32
27static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) 27static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
28{ 28{
29 /* 29 /*
@@ -50,22 +50,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
50 c->wp_works_ok ? "yes" : "no"); 50 c->wp_works_ok ? "yes" : "no");
51} 51}
52#else 52#else
53static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
54 unsigned int cpu)
55{
56#ifdef CONFIG_SMP
57 if (c->x86_max_cores * smp_num_siblings > 1) {
58 seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
59 seq_printf(m, "siblings\t: %d\n",
60 cpus_weight(per_cpu(cpu_core_map, cpu)));
61 seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
62 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
63 seq_printf(m, "apicid\t\t: %d\n", c->apicid);
64 seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid);
65 }
66#endif
67}
68
69static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) 53static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
70{ 54{
71 seq_printf(m, 55 seq_printf(m,