aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c56
-rw-r--r--arch/x86/kernel/cpu/amd.c56
-rw-r--r--arch/x86/kernel/cpu/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/centaur_64.c2
-rw-r--r--arch/x86/kernel/cpu/common.c579
-rw-r--r--arch/x86/kernel/cpu/cpu.h11
-rwxr-xr-xarch/x86/kernel/cpu/cpu_debug.c839
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/e_powersaver.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c6
-rw-r--r--arch/x86/kernel/cpu/cyrix.c16
-rw-r--r--arch/x86/kernel/cpu/intel.c49
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c71
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_32.c14
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c530
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c43
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c214
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c29
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c2
-rw-r--r--arch/x86/kernel/cpu/proc.c20
-rw-r--r--arch/x86/kernel/cpu/transmeta.c2
-rw-r--r--arch/x86/kernel/cpu/umc.c2
25 files changed, 2008 insertions, 550 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 82db7f45e2d..d4356f8b752 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -14,6 +14,8 @@ obj-y += vmware.o hypervisor.o
14obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o 14obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
15obj-$(CONFIG_X86_64) += bugs_64.o 15obj-$(CONFIG_X86_64) += bugs_64.o
16 16
17obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o
18
17obj-$(CONFIG_CPU_SUP_INTEL) += intel.o 19obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
18obj-$(CONFIG_CPU_SUP_AMD) += amd.o 20obj-$(CONFIG_CPU_SUP_AMD) += amd.o
19obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o 21obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 2cf23634b6d..8220ae69849 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -7,7 +7,7 @@
7#include <asm/pat.h> 7#include <asm/pat.h>
8#include <asm/processor.h> 8#include <asm/processor.h>
9 9
10#include <mach_apic.h> 10#include <asm/apic.h>
11 11
12struct cpuid_bit { 12struct cpuid_bit {
13 u16 feature; 13 u16 feature;
@@ -29,7 +29,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
29 u32 regs[4]; 29 u32 regs[4];
30 const struct cpuid_bit *cb; 30 const struct cpuid_bit *cb;
31 31
32 static const struct cpuid_bit cpuid_bits[] = { 32 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, 33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
34 { 0, 0, 0, 0 } 34 { 0, 0, 0, 0 }
35 }; 35 };
@@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
69 */ 69 */
70void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) 70void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
71{ 71{
72#ifdef CONFIG_X86_SMP 72#ifdef CONFIG_SMP
73 unsigned int eax, ebx, ecx, edx, sub_index; 73 unsigned int eax, ebx, ecx, edx, sub_index;
74 unsigned int ht_mask_width, core_plus_mask_width; 74 unsigned int ht_mask_width, core_plus_mask_width;
75 unsigned int core_select_mask, core_level_siblings; 75 unsigned int core_select_mask, core_level_siblings;
@@ -116,22 +116,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
116 116
117 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; 117 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
118 118
119#ifdef CONFIG_X86_32 119 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
120 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
121 & core_select_mask; 120 & core_select_mask;
122 c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); 121 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
123 /* 122 /*
124 * Reinit the apicid, now that we have extended initial_apicid. 123 * Reinit the apicid, now that we have extended initial_apicid.
125 */ 124 */
126 c->apicid = phys_pkg_id(c->initial_apicid, 0); 125 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
127#else 126
128 c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
129 c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
130 /*
131 * Reinit the apicid, now that we have extended initial_apicid.
132 */
133 c->apicid = phys_pkg_id(0);
134#endif
135 c->x86_max_cores = (core_level_siblings / smp_num_siblings); 127 c->x86_max_cores = (core_level_siblings / smp_num_siblings);
136 128
137 129
@@ -143,37 +135,3 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
143 return; 135 return;
144#endif 136#endif
145} 137}
146
147#ifdef CONFIG_X86_PAT
148void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
149{
150 if (!cpu_has_pat)
151 pat_disable("PAT not supported by CPU.");
152
153 switch (c->x86_vendor) {
154 case X86_VENDOR_INTEL:
155 /*
156 * There is a known erratum on Pentium III and Core Solo
157 * and Core Duo CPUs.
158 * " Page with PAT set to WC while associated MTRR is UC
159 * may consolidate to UC "
160 * Because of this erratum, it is better to stick with
161 * setting WC in MTRR rather than using PAT on these CPUs.
162 *
163 * Enable PAT WC only on P4, Core 2 or later CPUs.
164 */
165 if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15))
166 return;
167
168 pat_disable("PAT WC disabled due to known CPU erratum.");
169 return;
170
171 case X86_VENDOR_AMD:
172 case X86_VENDOR_CENTAUR:
173 case X86_VENDOR_TRANSMETA:
174 return;
175 }
176
177 pat_disable("PAT disabled. Not yet verified on this CPU type.");
178}
179#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c878f6aa91..7e4a459daa6 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
5#include <asm/io.h> 5#include <asm/io.h>
6#include <asm/processor.h> 6#include <asm/processor.h>
7#include <asm/apic.h> 7#include <asm/apic.h>
8#include <asm/cpu.h>
8 9
9#ifdef CONFIG_X86_64 10#ifdef CONFIG_X86_64
10# include <asm/numa_64.h> 11# include <asm/numa_64.h>
@@ -12,8 +13,6 @@
12# include <asm/cacheflush.h> 13# include <asm/cacheflush.h>
13#endif 14#endif
14 15
15#include <mach_apic.h>
16
17#include "cpu.h" 16#include "cpu.h"
18 17
19#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
@@ -143,6 +142,55 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
143 } 142 }
144} 143}
145 144
145static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
146{
147#ifdef CONFIG_SMP
148 /* calling is from identify_secondary_cpu() ? */
149 if (c->cpu_index == boot_cpu_id)
150 return;
151
152 /*
153 * Certain Athlons might work (for various values of 'work') in SMP
154 * but they are not certified as MP capable.
155 */
156 /* Athlon 660/661 is valid. */
157 if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
158 (c->x86_mask == 1)))
159 goto valid_k7;
160
161 /* Duron 670 is valid */
162 if ((c->x86_model == 7) && (c->x86_mask == 0))
163 goto valid_k7;
164
165 /*
166 * Athlon 662, Duron 671, and Athlon >model 7 have capability
167 * bit. It's worth noting that the A5 stepping (662) of some
168 * Athlon XP's have the MP bit set.
169 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
170 * more.
171 */
172 if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
173 ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
174 (c->x86_model > 7))
175 if (cpu_has_mp)
176 goto valid_k7;
177
178 /* If we get here, not a certified SMP capable AMD system. */
179
180 /*
181 * Don't taint if we are running SMP kernel on a single non-MP
182 * approved Athlon
183 */
184 WARN_ONCE(1, "WARNING: This combination of AMD"
185 "processors is not suitable for SMP.\n");
186 if (!test_taint(TAINT_UNSAFE_SMP))
187 add_taint(TAINT_UNSAFE_SMP);
188
189valid_k7:
190 ;
191#endif
192}
193
146static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) 194static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
147{ 195{
148 u32 l, h; 196 u32 l, h;
@@ -177,6 +225,8 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
177 } 225 }
178 226
179 set_cpu_cap(c, X86_FEATURE_K7); 227 set_cpu_cap(c, X86_FEATURE_K7);
228
229 amd_k7_smp_check(c);
180} 230}
181#endif 231#endif
182 232
@@ -452,7 +502,7 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int
452} 502}
453#endif 503#endif
454 504
455static struct cpu_dev amd_cpu_dev __cpuinitdata = { 505static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
456 .c_vendor = "AMD", 506 .c_vendor = "AMD",
457 .c_ident = { "AuthenticAMD" }, 507 .c_ident = { "AuthenticAMD" },
458#ifdef CONFIG_X86_32 508#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 89bfdd9cacc..983e0830f0d 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -468,7 +468,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
468 return size; 468 return size;
469} 469}
470 470
471static struct cpu_dev centaur_cpu_dev __cpuinitdata = { 471static const struct cpu_dev __cpuinitconst centaur_cpu_dev = {
472 .c_vendor = "Centaur", 472 .c_vendor = "Centaur",
473 .c_ident = { "CentaurHauls" }, 473 .c_ident = { "CentaurHauls" },
474 .c_early_init = early_init_centaur, 474 .c_early_init = early_init_centaur,
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
index a1625f5a1e7..51b09c48c9c 100644
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -25,7 +25,7 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
25 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); 25 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
26} 26}
27 27
28static struct cpu_dev centaur_cpu_dev __cpuinitdata = { 28static const struct cpu_dev centaur_cpu_dev __cpuinitconst = {
29 .c_vendor = "Centaur", 29 .c_vendor = "Centaur",
30 .c_ident = { "CentaurHauls" }, 30 .c_ident = { "CentaurHauls" },
31 .c_early_init = early_init_centaur, 31 .c_early_init = early_init_centaur,
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 83492b1f93b..e2962cc1e27 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1,118 +1,129 @@
1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
4#include <linux/string.h>
5#include <linux/bootmem.h> 1#include <linux/bootmem.h>
2#include <linux/linkage.h>
6#include <linux/bitops.h> 3#include <linux/bitops.h>
4#include <linux/kernel.h>
7#include <linux/module.h> 5#include <linux/module.h>
8#include <linux/kgdb.h> 6#include <linux/percpu.h>
9#include <linux/topology.h> 7#include <linux/string.h>
10#include <linux/delay.h> 8#include <linux/delay.h>
9#include <linux/sched.h>
10#include <linux/init.h>
11#include <linux/kgdb.h>
11#include <linux/smp.h> 12#include <linux/smp.h>
12#include <linux/percpu.h> 13#include <linux/io.h>
13#include <asm/i387.h> 14
14#include <asm/msr.h> 15#include <asm/stackprotector.h>
15#include <asm/io.h>
16#include <asm/linkage.h>
17#include <asm/mmu_context.h> 16#include <asm/mmu_context.h>
17#include <asm/hypervisor.h>
18#include <asm/processor.h>
19#include <asm/sections.h>
20#include <asm/topology.h>
21#include <asm/cpumask.h>
22#include <asm/pgtable.h>
23#include <asm/atomic.h>
24#include <asm/proto.h>
25#include <asm/setup.h>
26#include <asm/apic.h>
27#include <asm/desc.h>
28#include <asm/i387.h>
18#include <asm/mtrr.h> 29#include <asm/mtrr.h>
30#include <asm/numa.h>
31#include <asm/asm.h>
32#include <asm/cpu.h>
19#include <asm/mce.h> 33#include <asm/mce.h>
34#include <asm/msr.h>
20#include <asm/pat.h> 35#include <asm/pat.h>
21#include <asm/asm.h>
22#include <asm/numa.h>
23#include <asm/smp.h> 36#include <asm/smp.h>
37
24#ifdef CONFIG_X86_LOCAL_APIC 38#ifdef CONFIG_X86_LOCAL_APIC
25#include <asm/mpspec.h> 39#include <asm/uv/uv.h>
26#include <asm/apic.h>
27#include <mach_apic.h>
28#include <asm/genapic.h>
29#endif 40#endif
30 41
31#include <asm/pda.h>
32#include <asm/pgtable.h>
33#include <asm/processor.h>
34#include <asm/desc.h>
35#include <asm/atomic.h>
36#include <asm/proto.h>
37#include <asm/sections.h>
38#include <asm/setup.h>
39#include <asm/hypervisor.h>
40
41#include "cpu.h" 42#include "cpu.h"
42 43
43#ifdef CONFIG_X86_64 44#ifdef CONFIG_X86_64
44 45
45/* all of these masks are initialized in setup_cpu_local_masks() */ 46/* all of these masks are initialized in setup_cpu_local_masks() */
46cpumask_var_t cpu_callin_mask;
47cpumask_var_t cpu_callout_mask;
48cpumask_var_t cpu_initialized_mask; 47cpumask_var_t cpu_initialized_mask;
48cpumask_var_t cpu_callout_mask;
49cpumask_var_t cpu_callin_mask;
49 50
50/* representing cpus for which sibling maps can be computed */ 51/* representing cpus for which sibling maps can be computed */
51cpumask_var_t cpu_sibling_setup_mask; 52cpumask_var_t cpu_sibling_setup_mask;
52 53
54/* correctly size the local cpu masks */
55void __init setup_cpu_local_masks(void)
56{
57 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
58 alloc_bootmem_cpumask_var(&cpu_callin_mask);
59 alloc_bootmem_cpumask_var(&cpu_callout_mask);
60 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
61}
62
53#else /* CONFIG_X86_32 */ 63#else /* CONFIG_X86_32 */
54 64
55cpumask_t cpu_callin_map; 65cpumask_t cpu_sibling_setup_map;
56cpumask_t cpu_callout_map; 66cpumask_t cpu_callout_map;
57cpumask_t cpu_initialized; 67cpumask_t cpu_initialized;
58cpumask_t cpu_sibling_setup_map; 68cpumask_t cpu_callin_map;
59 69
60#endif /* CONFIG_X86_32 */ 70#endif /* CONFIG_X86_32 */
61 71
62 72
63static struct cpu_dev *this_cpu __cpuinitdata; 73static const struct cpu_dev *this_cpu __cpuinitdata;
64 74
75DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
65#ifdef CONFIG_X86_64 76#ifdef CONFIG_X86_64
66/* We need valid kernel segments for data and code in long mode too 77 /*
67 * IRET will check the segment types kkeil 2000/10/28 78 * We need valid kernel segments for data and code in long mode too
68 * Also sysret mandates a special GDT layout 79 * IRET will check the segment types kkeil 2000/10/28
69 */ 80 * Also sysret mandates a special GDT layout
70/* The TLS descriptors are currently at a different place compared to i386. 81 *
71 Hopefully nobody expects them at a fixed place (Wine?) */ 82 * TLS descriptors are currently at a different place compared to i386.
72DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { 83 * Hopefully nobody expects them at a fixed place (Wine?)
73 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, 84 */
74 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, 85 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
75 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, 86 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
76 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, 87 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
77 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, 88 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
78 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, 89 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
79} }; 90 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
80#else 91#else
81DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { 92 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
82 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 93 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
83 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 94 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
84 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 95 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
85 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
86 /* 96 /*
87 * Segments used for calling PnP BIOS have byte granularity. 97 * Segments used for calling PnP BIOS have byte granularity.
88 * They code segments and data segments have fixed 64k limits, 98 * They code segments and data segments have fixed 64k limits,
89 * the transfer segment sizes are set at run time. 99 * the transfer segment sizes are set at run time.
90 */ 100 */
91 /* 32-bit code */ 101 /* 32-bit code */
92 [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, 102 [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } },
93 /* 16-bit code */ 103 /* 16-bit code */
94 [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, 104 [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } },
95 /* 16-bit data */ 105 /* 16-bit data */
96 [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, 106 [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } },
97 /* 16-bit data */ 107 /* 16-bit data */
98 [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, 108 [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } },
99 /* 16-bit data */ 109 /* 16-bit data */
100 [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, 110 [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } },
101 /* 111 /*
102 * The APM segments have byte granularity and their bases 112 * The APM segments have byte granularity and their bases
103 * are set at run time. All have 64k limits. 113 * are set at run time. All have 64k limits.
104 */ 114 */
105 /* 32-bit code */ 115 /* 32-bit code */
106 [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, 116 [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } },
107 /* 16-bit code */ 117 /* 16-bit code */
108 [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, 118 [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } },
109 /* data */ 119 /* data */
110 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 120 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
111 121
112 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 122 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
113 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, 123 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
114} }; 124 GDT_STACK_CANARY_INIT
115#endif 125#endif
126} };
116EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 127EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
117 128
118#ifdef CONFIG_X86_32 129#ifdef CONFIG_X86_32
@@ -153,16 +164,17 @@ static inline int flag_is_changeable_p(u32 flag)
153 * the CPUID. Add "volatile" to not allow gcc to 164 * the CPUID. Add "volatile" to not allow gcc to
154 * optimize the subsequent calls to this function. 165 * optimize the subsequent calls to this function.
155 */ 166 */
156 asm volatile ("pushfl\n\t" 167 asm volatile ("pushfl \n\t"
157 "pushfl\n\t" 168 "pushfl \n\t"
158 "popl %0\n\t" 169 "popl %0 \n\t"
159 "movl %0,%1\n\t" 170 "movl %0, %1 \n\t"
160 "xorl %2,%0\n\t" 171 "xorl %2, %0 \n\t"
161 "pushl %0\n\t" 172 "pushl %0 \n\t"
162 "popfl\n\t" 173 "popfl \n\t"
163 "pushfl\n\t" 174 "pushfl \n\t"
164 "popl %0\n\t" 175 "popl %0 \n\t"
165 "popfl\n\t" 176 "popfl \n\t"
177
166 : "=&r" (f1), "=&r" (f2) 178 : "=&r" (f1), "=&r" (f2)
167 : "ir" (flag)); 179 : "ir" (flag));
168 180
@@ -177,18 +189,22 @@ static int __cpuinit have_cpuid_p(void)
177 189
178static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 190static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
179{ 191{
180 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { 192 unsigned long lo, hi;
181 /* Disable processor serial number */ 193
182 unsigned long lo, hi; 194 if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr)
183 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); 195 return;
184 lo |= 0x200000; 196
185 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); 197 /* Disable processor serial number: */
186 printk(KERN_NOTICE "CPU serial number disabled.\n"); 198
187 clear_cpu_cap(c, X86_FEATURE_PN); 199 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
188 200 lo |= 0x200000;
189 /* Disabling the serial number may affect the cpuid level */ 201 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
190 c->cpuid_level = cpuid_eax(0); 202
191 } 203 printk(KERN_NOTICE "CPU serial number disabled.\n");
204 clear_cpu_cap(c, X86_FEATURE_PN);
205
206 /* Disabling the serial number may affect the cpuid level */
207 c->cpuid_level = cpuid_eax(0);
192} 208}
193 209
194static int __init x86_serial_nr_setup(char *s) 210static int __init x86_serial_nr_setup(char *s)
@@ -213,16 +229,64 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
213#endif 229#endif
214 230
215/* 231/*
232 * Some CPU features depend on higher CPUID levels, which may not always
233 * be available due to CPUID level capping or broken virtualization
234 * software. Add those features to this table to auto-disable them.
235 */
236struct cpuid_dependent_feature {
237 u32 feature;
238 u32 level;
239};
240
241static const struct cpuid_dependent_feature __cpuinitconst
242cpuid_dependent_features[] = {
243 { X86_FEATURE_MWAIT, 0x00000005 },
244 { X86_FEATURE_DCA, 0x00000009 },
245 { X86_FEATURE_XSAVE, 0x0000000d },
246 { 0, 0 }
247};
248
249static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
250{
251 const struct cpuid_dependent_feature *df;
252
253 for (df = cpuid_dependent_features; df->feature; df++) {
254
255 if (!cpu_has(c, df->feature))
256 continue;
257 /*
258 * Note: cpuid_level is set to -1 if unavailable, but
259 * extended_extended_level is set to 0 if unavailable
260 * and the legitimate extended levels are all negative
261 * when signed; hence the weird messing around with
262 * signs here...
263 */
264 if (!((s32)df->level < 0 ?
265 (u32)df->level > (u32)c->extended_cpuid_level :
266 (s32)df->level > (s32)c->cpuid_level))
267 continue;
268
269 clear_cpu_cap(c, df->feature);
270 if (!warn)
271 continue;
272
273 printk(KERN_WARNING
274 "CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
275 x86_cap_flags[df->feature], df->level);
276 }
277}
278
279/*
216 * Naming convention should be: <Name> [(<Codename>)] 280 * Naming convention should be: <Name> [(<Codename>)]
217 * This table only is used unless init_<vendor>() below doesn't set it; 281 * This table only is used unless init_<vendor>() below doesn't set it;
218 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used 282 * in particular, if CPUID levels 0x80000002..4 are supported, this
219 * 283 * isn't used
220 */ 284 */
221 285
222/* Look up CPU names by table lookup. */ 286/* Look up CPU names by table lookup. */
223static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) 287static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
224{ 288{
225 struct cpu_model_info *info; 289 const struct cpu_model_info *info;
226 290
227 if (c->x86_model >= 16) 291 if (c->x86_model >= 16)
228 return NULL; /* Range check */ 292 return NULL; /* Range check */
@@ -242,21 +306,34 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
242 306
243__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; 307__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
244 308
245/* Current gdt points %fs at the "master" per-cpu area: after this, 309void load_percpu_segment(int cpu)
246 * it's on the real one. */ 310{
247void switch_to_new_gdt(void) 311#ifdef CONFIG_X86_32
312 loadsegment(fs, __KERNEL_PERCPU);
313#else
314 loadsegment(gs, 0);
315 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
316#endif
317 load_stack_canary_segment();
318}
319
320/*
321 * Current gdt points %fs at the "master" per-cpu area: after this,
322 * it's on the real one.
323 */
324void switch_to_new_gdt(int cpu)
248{ 325{
249 struct desc_ptr gdt_descr; 326 struct desc_ptr gdt_descr;
250 327
251 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); 328 gdt_descr.address = (long)get_cpu_gdt_table(cpu);
252 gdt_descr.size = GDT_SIZE - 1; 329 gdt_descr.size = GDT_SIZE - 1;
253 load_gdt(&gdt_descr); 330 load_gdt(&gdt_descr);
254#ifdef CONFIG_X86_32 331 /* Reload the per-cpu base */
255 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); 332
256#endif 333 load_percpu_segment(cpu);
257} 334}
258 335
259static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; 336static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
260 337
261static void __cpuinit default_init(struct cpuinfo_x86 *c) 338static void __cpuinit default_init(struct cpuinfo_x86 *c)
262{ 339{
@@ -275,7 +352,7 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c)
275#endif 352#endif
276} 353}
277 354
278static struct cpu_dev __cpuinitdata default_cpu = { 355static const struct cpu_dev __cpuinitconst default_cpu = {
279 .c_init = default_init, 356 .c_init = default_init,
280 .c_vendor = "Unknown", 357 .c_vendor = "Unknown",
281 .c_x86_vendor = X86_VENDOR_UNKNOWN, 358 .c_x86_vendor = X86_VENDOR_UNKNOWN,
@@ -289,22 +366,24 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
289 if (c->extended_cpuid_level < 0x80000004) 366 if (c->extended_cpuid_level < 0x80000004)
290 return; 367 return;
291 368
292 v = (unsigned int *) c->x86_model_id; 369 v = (unsigned int *)c->x86_model_id;
293 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); 370 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
294 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); 371 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
295 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); 372 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
296 c->x86_model_id[48] = 0; 373 c->x86_model_id[48] = 0;
297 374
298 /* Intel chips right-justify this string for some dumb reason; 375 /*
299 undo that brain damage */ 376 * Intel chips right-justify this string for some dumb reason;
377 * undo that brain damage:
378 */
300 p = q = &c->x86_model_id[0]; 379 p = q = &c->x86_model_id[0];
301 while (*p == ' ') 380 while (*p == ' ')
302 p++; 381 p++;
303 if (p != q) { 382 if (p != q) {
304 while (*p) 383 while (*p)
305 *q++ = *p++; 384 *q++ = *p++;
306 while (q <= &c->x86_model_id[48]) 385 while (q <= &c->x86_model_id[48])
307 *q++ = '\0'; /* Zero-pad the rest */ 386 *q++ = '\0'; /* Zero-pad the rest */
308 } 387 }
309} 388}
310 389
@@ -373,36 +452,30 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
373 452
374 if (smp_num_siblings == 1) { 453 if (smp_num_siblings == 1) {
375 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 454 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
376 } else if (smp_num_siblings > 1) { 455 goto out;
456 }
377 457
378 if (smp_num_siblings > nr_cpu_ids) { 458 if (smp_num_siblings <= 1)
379 printk(KERN_WARNING "CPU: Unsupported number of siblings %d", 459 goto out;
380 smp_num_siblings);
381 smp_num_siblings = 1;
382 return;
383 }
384 460
385 index_msb = get_count_order(smp_num_siblings); 461 if (smp_num_siblings > nr_cpu_ids) {
386#ifdef CONFIG_X86_64 462 pr_warning("CPU: Unsupported number of siblings %d",
387 c->phys_proc_id = phys_pkg_id(index_msb); 463 smp_num_siblings);
388#else 464 smp_num_siblings = 1;
389 c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); 465 return;
390#endif 466 }
391 467
392 smp_num_siblings = smp_num_siblings / c->x86_max_cores; 468 index_msb = get_count_order(smp_num_siblings);
469 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
393 470
394 index_msb = get_count_order(smp_num_siblings); 471 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
395 472
396 core_bits = get_count_order(c->x86_max_cores); 473 index_msb = get_count_order(smp_num_siblings);
397 474
398#ifdef CONFIG_X86_64 475 core_bits = get_count_order(c->x86_max_cores);
399 c->cpu_core_id = phys_pkg_id(index_msb) & 476
400 ((1 << core_bits) - 1); 477 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
401#else 478 ((1 << core_bits) - 1);
402 c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
403 ((1 << core_bits) - 1);
404#endif
405 }
406 479
407out: 480out:
408 if ((c->x86_max_cores * smp_num_siblings) > 1) { 481 if ((c->x86_max_cores * smp_num_siblings) > 1) {
@@ -417,8 +490,8 @@ out:
417static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) 490static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
418{ 491{
419 char *v = c->x86_vendor_id; 492 char *v = c->x86_vendor_id;
420 int i;
421 static int printed; 493 static int printed;
494 int i;
422 495
423 for (i = 0; i < X86_VENDOR_NUM; i++) { 496 for (i = 0; i < X86_VENDOR_NUM; i++) {
424 if (!cpu_devs[i]) 497 if (!cpu_devs[i])
@@ -427,6 +500,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
427 if (!strcmp(v, cpu_devs[i]->c_ident[0]) || 500 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
428 (cpu_devs[i]->c_ident[1] && 501 (cpu_devs[i]->c_ident[1] &&
429 !strcmp(v, cpu_devs[i]->c_ident[1]))) { 502 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
503
430 this_cpu = cpu_devs[i]; 504 this_cpu = cpu_devs[i];
431 c->x86_vendor = this_cpu->c_x86_vendor; 505 c->x86_vendor = this_cpu->c_x86_vendor;
432 return; 506 return;
@@ -435,7 +509,9 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
435 509
436 if (!printed) { 510 if (!printed) {
437 printed++; 511 printed++;
438 printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v); 512 printk(KERN_ERR
513 "CPU: vendor_id '%s' unknown, using generic init.\n", v);
514
439 printk(KERN_ERR "CPU: Your system may be unstable.\n"); 515 printk(KERN_ERR "CPU: Your system may be unstable.\n");
440 } 516 }
441 517
@@ -455,14 +531,17 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
455 /* Intel-defined flags: level 0x00000001 */ 531 /* Intel-defined flags: level 0x00000001 */
456 if (c->cpuid_level >= 0x00000001) { 532 if (c->cpuid_level >= 0x00000001) {
457 u32 junk, tfms, cap0, misc; 533 u32 junk, tfms, cap0, misc;
534
458 cpuid(0x00000001, &tfms, &misc, &junk, &cap0); 535 cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
459 c->x86 = (tfms >> 8) & 0xf; 536 c->x86 = (tfms >> 8) & 0xf;
460 c->x86_model = (tfms >> 4) & 0xf; 537 c->x86_model = (tfms >> 4) & 0xf;
461 c->x86_mask = tfms & 0xf; 538 c->x86_mask = tfms & 0xf;
539
462 if (c->x86 == 0xf) 540 if (c->x86 == 0xf)
463 c->x86 += (tfms >> 20) & 0xff; 541 c->x86 += (tfms >> 20) & 0xff;
464 if (c->x86 >= 0x6) 542 if (c->x86 >= 0x6)
465 c->x86_model += ((tfms >> 16) & 0xf) << 4; 543 c->x86_model += ((tfms >> 16) & 0xf) << 4;
544
466 if (cap0 & (1<<19)) { 545 if (cap0 & (1<<19)) {
467 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 546 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
468 c->x86_cache_alignment = c->x86_clflush_size; 547 c->x86_cache_alignment = c->x86_clflush_size;
@@ -478,6 +557,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
478 /* Intel-defined flags: level 0x00000001 */ 557 /* Intel-defined flags: level 0x00000001 */
479 if (c->cpuid_level >= 0x00000001) { 558 if (c->cpuid_level >= 0x00000001) {
480 u32 capability, excap; 559 u32 capability, excap;
560
481 cpuid(0x00000001, &tfms, &ebx, &excap, &capability); 561 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
482 c->x86_capability[0] = capability; 562 c->x86_capability[0] = capability;
483 c->x86_capability[4] = excap; 563 c->x86_capability[4] = excap;
@@ -486,6 +566,7 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
486 /* AMD-defined flags: level 0x80000001 */ 566 /* AMD-defined flags: level 0x80000001 */
487 xlvl = cpuid_eax(0x80000000); 567 xlvl = cpuid_eax(0x80000000);
488 c->extended_cpuid_level = xlvl; 568 c->extended_cpuid_level = xlvl;
569
489 if ((xlvl & 0xffff0000) == 0x80000000) { 570 if ((xlvl & 0xffff0000) == 0x80000000) {
490 if (xlvl >= 0x80000001) { 571 if (xlvl >= 0x80000001) {
491 c->x86_capability[1] = cpuid_edx(0x80000001); 572 c->x86_capability[1] = cpuid_edx(0x80000001);
@@ -493,13 +574,15 @@ static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
493 } 574 }
494 } 575 }
495 576
496#ifdef CONFIG_X86_64
497 if (c->extended_cpuid_level >= 0x80000008) { 577 if (c->extended_cpuid_level >= 0x80000008) {
498 u32 eax = cpuid_eax(0x80000008); 578 u32 eax = cpuid_eax(0x80000008);
499 579
500 c->x86_virt_bits = (eax >> 8) & 0xff; 580 c->x86_virt_bits = (eax >> 8) & 0xff;
501 c->x86_phys_bits = eax & 0xff; 581 c->x86_phys_bits = eax & 0xff;
502 } 582 }
583#ifdef CONFIG_X86_32
584 else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
585 c->x86_phys_bits = 36;
503#endif 586#endif
504 587
505 if (c->extended_cpuid_level >= 0x80000007) 588 if (c->extended_cpuid_level >= 0x80000007)
@@ -546,8 +629,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
546{ 629{
547#ifdef CONFIG_X86_64 630#ifdef CONFIG_X86_64
548 c->x86_clflush_size = 64; 631 c->x86_clflush_size = 64;
632 c->x86_phys_bits = 36;
633 c->x86_virt_bits = 48;
549#else 634#else
550 c->x86_clflush_size = 32; 635 c->x86_clflush_size = 32;
636 c->x86_phys_bits = 32;
637 c->x86_virt_bits = 32;
551#endif 638#endif
552 c->x86_cache_alignment = c->x86_clflush_size; 639 c->x86_cache_alignment = c->x86_clflush_size;
553 640
@@ -570,21 +657,20 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
570 if (this_cpu->c_early_init) 657 if (this_cpu->c_early_init)
571 this_cpu->c_early_init(c); 658 this_cpu->c_early_init(c);
572 659
573 validate_pat_support(c);
574
575#ifdef CONFIG_SMP 660#ifdef CONFIG_SMP
576 c->cpu_index = boot_cpu_id; 661 c->cpu_index = boot_cpu_id;
577#endif 662#endif
663 filter_cpuid_features(c, false);
578} 664}
579 665
580void __init early_cpu_init(void) 666void __init early_cpu_init(void)
581{ 667{
582 struct cpu_dev **cdev; 668 const struct cpu_dev *const *cdev;
583 int count = 0; 669 int count = 0;
584 670
585 printk("KERNEL supported cpus:\n"); 671 printk(KERN_INFO "KERNEL supported cpus:\n");
586 for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { 672 for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
587 struct cpu_dev *cpudev = *cdev; 673 const struct cpu_dev *cpudev = *cdev;
588 unsigned int j; 674 unsigned int j;
589 675
590 if (count >= X86_VENDOR_NUM) 676 if (count >= X86_VENDOR_NUM)
@@ -595,7 +681,7 @@ void __init early_cpu_init(void)
595 for (j = 0; j < 2; j++) { 681 for (j = 0; j < 2; j++) {
596 if (!cpudev->c_ident[j]) 682 if (!cpudev->c_ident[j])
597 continue; 683 continue;
598 printk(" %s %s\n", cpudev->c_vendor, 684 printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
599 cpudev->c_ident[j]); 685 cpudev->c_ident[j]);
600 } 686 }
601 } 687 }
@@ -637,7 +723,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
637 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; 723 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
638#ifdef CONFIG_X86_32 724#ifdef CONFIG_X86_32
639# ifdef CONFIG_X86_HT 725# ifdef CONFIG_X86_HT
640 c->apicid = phys_pkg_id(c->initial_apicid, 0); 726 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
641# else 727# else
642 c->apicid = c->initial_apicid; 728 c->apicid = c->initial_apicid;
643# endif 729# endif
@@ -671,9 +757,13 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
671 c->x86_coreid_bits = 0; 757 c->x86_coreid_bits = 0;
672#ifdef CONFIG_X86_64 758#ifdef CONFIG_X86_64
673 c->x86_clflush_size = 64; 759 c->x86_clflush_size = 64;
760 c->x86_phys_bits = 36;
761 c->x86_virt_bits = 48;
674#else 762#else
675 c->cpuid_level = -1; /* CPUID not detected */ 763 c->cpuid_level = -1; /* CPUID not detected */
676 c->x86_clflush_size = 32; 764 c->x86_clflush_size = 32;
765 c->x86_phys_bits = 32;
766 c->x86_virt_bits = 32;
677#endif 767#endif
678 c->x86_cache_alignment = c->x86_clflush_size; 768 c->x86_cache_alignment = c->x86_clflush_size;
679 memset(&c->x86_capability, 0, sizeof c->x86_capability); 769 memset(&c->x86_capability, 0, sizeof c->x86_capability);
@@ -684,7 +774,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
684 this_cpu->c_identify(c); 774 this_cpu->c_identify(c);
685 775
686#ifdef CONFIG_X86_64 776#ifdef CONFIG_X86_64
687 c->apicid = phys_pkg_id(0); 777 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
688#endif 778#endif
689 779
690 /* 780 /*
@@ -704,13 +794,16 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
704 squash_the_stupid_serial_number(c); 794 squash_the_stupid_serial_number(c);
705 795
706 /* 796 /*
707 * The vendor-specific functions might have changed features. Now 797 * The vendor-specific functions might have changed features.
708 * we do "generic changes." 798 * Now we do "generic changes."
709 */ 799 */
710 800
801 /* Filter out anything that depends on CPUID levels we don't have */
802 filter_cpuid_features(c, true);
803
711 /* If the model name is still unset, do table lookup. */ 804 /* If the model name is still unset, do table lookup. */
712 if (!c->x86_model_id[0]) { 805 if (!c->x86_model_id[0]) {
713 char *p; 806 const char *p;
714 p = table_lookup_model(c); 807 p = table_lookup_model(c);
715 if (p) 808 if (p)
716 strcpy(c->x86_model_id, p); 809 strcpy(c->x86_model_id, p);
@@ -785,11 +878,11 @@ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
785} 878}
786 879
787struct msr_range { 880struct msr_range {
788 unsigned min; 881 unsigned min;
789 unsigned max; 882 unsigned max;
790}; 883};
791 884
792static struct msr_range msr_range_array[] __cpuinitdata = { 885static const struct msr_range msr_range_array[] __cpuinitconst = {
793 { 0x00000000, 0x00000418}, 886 { 0x00000000, 0x00000418},
794 { 0xc0000000, 0xc000040b}, 887 { 0xc0000000, 0xc000040b},
795 { 0xc0010000, 0xc0010142}, 888 { 0xc0010000, 0xc0010142},
@@ -798,14 +891,15 @@ static struct msr_range msr_range_array[] __cpuinitdata = {
798 891
799static void __cpuinit print_cpu_msr(void) 892static void __cpuinit print_cpu_msr(void)
800{ 893{
894 unsigned index_min, index_max;
801 unsigned index; 895 unsigned index;
802 u64 val; 896 u64 val;
803 int i; 897 int i;
804 unsigned index_min, index_max;
805 898
806 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { 899 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
807 index_min = msr_range_array[i].min; 900 index_min = msr_range_array[i].min;
808 index_max = msr_range_array[i].max; 901 index_max = msr_range_array[i].max;
902
809 for (index = index_min; index < index_max; index++) { 903 for (index = index_min; index < index_max; index++) {
810 if (rdmsrl_amd_safe(index, &val)) 904 if (rdmsrl_amd_safe(index, &val))
811 continue; 905 continue;
@@ -815,6 +909,7 @@ static void __cpuinit print_cpu_msr(void)
815} 909}
816 910
817static int show_msr __cpuinitdata; 911static int show_msr __cpuinitdata;
912
818static __init int setup_show_msr(char *arg) 913static __init int setup_show_msr(char *arg)
819{ 914{
820 int num; 915 int num;
@@ -836,12 +931,14 @@ __setup("noclflush", setup_noclflush);
836 931
837void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) 932void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
838{ 933{
839 char *vendor = NULL; 934 const char *vendor = NULL;
840 935
841 if (c->x86_vendor < X86_VENDOR_NUM) 936 if (c->x86_vendor < X86_VENDOR_NUM) {
842 vendor = this_cpu->c_vendor; 937 vendor = this_cpu->c_vendor;
843 else if (c->cpuid_level >= 0) 938 } else {
844 vendor = c->x86_vendor_id; 939 if (c->cpuid_level >= 0)
940 vendor = c->x86_vendor_id;
941 }
845 942
846 if (vendor && !strstr(c->x86_model_id, vendor)) 943 if (vendor && !strstr(c->x86_model_id, vendor))
847 printk(KERN_CONT "%s ", vendor); 944 printk(KERN_CONT "%s ", vendor);
@@ -868,65 +965,45 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
868static __init int setup_disablecpuid(char *arg) 965static __init int setup_disablecpuid(char *arg)
869{ 966{
870 int bit; 967 int bit;
968
871 if (get_option(&arg, &bit) && bit < NCAPINTS*32) 969 if (get_option(&arg, &bit) && bit < NCAPINTS*32)
872 setup_clear_cpu_cap(bit); 970 setup_clear_cpu_cap(bit);
873 else 971 else
874 return 0; 972 return 0;
973
875 return 1; 974 return 1;
876} 975}
877__setup("clearcpuid=", setup_disablecpuid); 976__setup("clearcpuid=", setup_disablecpuid);
878 977
879#ifdef CONFIG_X86_64 978#ifdef CONFIG_X86_64
880struct x8664_pda **_cpu_pda __read_mostly;
881EXPORT_SYMBOL(_cpu_pda);
882
883struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 979struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
884 980
885static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; 981DEFINE_PER_CPU_FIRST(union irq_stack_union,
982 irq_stack_union) __aligned(PAGE_SIZE);
886 983
887void __cpuinit pda_init(int cpu) 984DEFINE_PER_CPU(char *, irq_stack_ptr) =
888{ 985 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
889 struct x8664_pda *pda = cpu_pda(cpu);
890 986
891 /* Setup up data that may be needed in __get_free_pages early */ 987DEFINE_PER_CPU(unsigned long, kernel_stack) =
892 loadsegment(fs, 0); 988 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
893 loadsegment(gs, 0); 989EXPORT_PER_CPU_SYMBOL(kernel_stack);
894 /* Memory clobbers used to order PDA accessed */
895 mb();
896 wrmsrl(MSR_GS_BASE, pda);
897 mb();
898
899 pda->cpunumber = cpu;
900 pda->irqcount = -1;
901 pda->kernelstack = (unsigned long)stack_thread_info() -
902 PDA_STACKOFFSET + THREAD_SIZE;
903 pda->active_mm = &init_mm;
904 pda->mmu_state = 0;
905
906 if (cpu == 0) {
907 /* others are initialized in smpboot.c */
908 pda->pcurrent = &init_task;
909 pda->irqstackptr = boot_cpu_stack;
910 pda->irqstackptr += IRQSTACKSIZE - 64;
911 } else {
912 if (!pda->irqstackptr) {
913 pda->irqstackptr = (char *)
914 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
915 if (!pda->irqstackptr)
916 panic("cannot allocate irqstack for cpu %d",
917 cpu);
918 pda->irqstackptr += IRQSTACKSIZE - 64;
919 }
920 990
921 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) 991DEFINE_PER_CPU(unsigned int, irq_count) = -1;
922 pda->nodenumber = cpu_to_node(cpu);
923 }
924}
925 992
926static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + 993/*
927 DEBUG_STKSZ] __page_aligned_bss; 994 * Special IST stacks which the CPU switches to when it calls
995 * an IST-marked descriptor entry. Up to 7 stacks (hardware
996 * limit), all of them are 4K, except the debug stack which
997 * is 8K.
998 */
999static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
1000 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1001 [DEBUG_STACK - 1] = DEBUG_STKSZ
1002};
928 1003
929extern asmlinkage void ignore_sysret(void); 1004static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
1005 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
1006 __aligned(PAGE_SIZE);
930 1007
931/* May not be marked __init: used by software suspend */ 1008/* May not be marked __init: used by software suspend */
932void syscall_init(void) 1009void syscall_init(void)
@@ -957,16 +1034,38 @@ unsigned long kernel_eflags;
957 */ 1034 */
958DEFINE_PER_CPU(struct orig_ist, orig_ist); 1035DEFINE_PER_CPU(struct orig_ist, orig_ist);
959 1036
960#else 1037#else /* CONFIG_X86_64 */
1038
1039#ifdef CONFIG_CC_STACKPROTECTOR
1040DEFINE_PER_CPU(unsigned long, stack_canary);
1041#endif
961 1042
962/* Make sure %fs is initialized properly in idle threads */ 1043/* Make sure %fs and %gs are initialized properly in idle threads */
963struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) 1044struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
964{ 1045{
965 memset(regs, 0, sizeof(struct pt_regs)); 1046 memset(regs, 0, sizeof(struct pt_regs));
966 regs->fs = __KERNEL_PERCPU; 1047 regs->fs = __KERNEL_PERCPU;
1048 regs->gs = __KERNEL_STACK_CANARY;
1049
967 return regs; 1050 return regs;
968} 1051}
969#endif 1052#endif /* CONFIG_X86_64 */
1053
1054/*
1055 * Clear all 6 debug registers:
1056 */
1057static void clear_all_debug_regs(void)
1058{
1059 int i;
1060
1061 for (i = 0; i < 8; i++) {
1062 /* Ignore db4, db5 */
1063 if ((i == 4) || (i == 5))
1064 continue;
1065
1066 set_debugreg(0, i);
1067 }
1068}
970 1069
971/* 1070/*
972 * cpu_init() initializes state that is per-CPU. Some data is already 1071 * cpu_init() initializes state that is per-CPU. Some data is already
@@ -976,21 +1075,25 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
976 * A lot of state is already set up in PDA init for 64 bit 1075 * A lot of state is already set up in PDA init for 64 bit
977 */ 1076 */
978#ifdef CONFIG_X86_64 1077#ifdef CONFIG_X86_64
1078
979void __cpuinit cpu_init(void) 1079void __cpuinit cpu_init(void)
980{ 1080{
981 int cpu = stack_smp_processor_id(); 1081 struct orig_ist *orig_ist;
982 struct tss_struct *t = &per_cpu(init_tss, cpu);
983 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
984 unsigned long v;
985 char *estacks = NULL;
986 struct task_struct *me; 1082 struct task_struct *me;
1083 struct tss_struct *t;
1084 unsigned long v;
1085 int cpu;
987 int i; 1086 int i;
988 1087
989 /* CPU 0 is initialised in head64.c */ 1088 cpu = stack_smp_processor_id();
990 if (cpu != 0) 1089 t = &per_cpu(init_tss, cpu);
991 pda_init(cpu); 1090 orig_ist = &per_cpu(orig_ist, cpu);
992 else 1091
993 estacks = boot_exception_stacks; 1092#ifdef CONFIG_NUMA
1093 if (cpu != 0 && percpu_read(node_number) == 0 &&
1094 cpu_to_node(cpu) != NUMA_NO_NODE)
1095 percpu_write(node_number, cpu_to_node(cpu));
1096#endif
994 1097
995 me = current; 1098 me = current;
996 1099
@@ -1006,7 +1109,9 @@ void __cpuinit cpu_init(void)
1006 * and set up the GDT descriptor: 1109 * and set up the GDT descriptor:
1007 */ 1110 */
1008 1111
1009 switch_to_new_gdt(); 1112 switch_to_new_gdt(cpu);
1113 loadsegment(fs, 0);
1114
1010 load_idt((const struct desc_ptr *)&idt_descr); 1115 load_idt((const struct desc_ptr *)&idt_descr);
1011 1116
1012 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); 1117 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
@@ -1017,31 +1122,24 @@ void __cpuinit cpu_init(void)
1017 barrier(); 1122 barrier();
1018 1123
1019 check_efer(); 1124 check_efer();
1020 if (cpu != 0 && x2apic) 1125 if (cpu != 0)
1021 enable_x2apic(); 1126 enable_x2apic();
1022 1127
1023 /* 1128 /*
1024 * set up and load the per-CPU TSS 1129 * set up and load the per-CPU TSS
1025 */ 1130 */
1026 if (!orig_ist->ist[0]) { 1131 if (!orig_ist->ist[0]) {
1027 static const unsigned int order[N_EXCEPTION_STACKS] = { 1132 char *estacks = per_cpu(exception_stacks, cpu);
1028 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, 1133
1029 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
1030 };
1031 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1134 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1032 if (cpu) { 1135 estacks += exception_stack_sizes[v];
1033 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
1034 if (!estacks)
1035 panic("Cannot allocate exception "
1036 "stack %ld %d\n", v, cpu);
1037 }
1038 estacks += PAGE_SIZE << order[v];
1039 orig_ist->ist[v] = t->x86_tss.ist[v] = 1136 orig_ist->ist[v] = t->x86_tss.ist[v] =
1040 (unsigned long)estacks; 1137 (unsigned long)estacks;
1041 } 1138 }
1042 } 1139 }
1043 1140
1044 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 1141 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
1142
1045 /* 1143 /*
1046 * <= is required because the CPU will access up to 1144 * <= is required because the CPU will access up to
1047 * 8 bits beyond the end of the IO permission bitmap. 1145 * 8 bits beyond the end of the IO permission bitmap.
@@ -1051,8 +1149,7 @@ void __cpuinit cpu_init(void)
1051 1149
1052 atomic_inc(&init_mm.mm_count); 1150 atomic_inc(&init_mm.mm_count);
1053 me->active_mm = &init_mm; 1151 me->active_mm = &init_mm;
1054 if (me->mm) 1152 BUG_ON(me->mm);
1055 BUG();
1056 enter_lazy_tlb(&init_mm, me); 1153 enter_lazy_tlb(&init_mm, me);
1057 1154
1058 load_sp0(t, &current->thread); 1155 load_sp0(t, &current->thread);
@@ -1069,22 +1166,9 @@ void __cpuinit cpu_init(void)
1069 */ 1166 */
1070 if (kgdb_connected && arch_kgdb_ops.correct_hw_break) 1167 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
1071 arch_kgdb_ops.correct_hw_break(); 1168 arch_kgdb_ops.correct_hw_break();
1072 else { 1169 else
1073#endif
1074 /*
1075 * Clear all 6 debug registers:
1076 */
1077
1078 set_debugreg(0UL, 0);
1079 set_debugreg(0UL, 1);
1080 set_debugreg(0UL, 2);
1081 set_debugreg(0UL, 3);
1082 set_debugreg(0UL, 6);
1083 set_debugreg(0UL, 7);
1084#ifdef CONFIG_KGDB
1085 /* If the kgdb is connected no debug regs should be altered. */
1086 }
1087#endif 1170#endif
1171 clear_all_debug_regs();
1088 1172
1089 fpu_init(); 1173 fpu_init();
1090 1174
@@ -1105,7 +1189,8 @@ void __cpuinit cpu_init(void)
1105 1189
1106 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { 1190 if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
1107 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); 1191 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
1108 for (;;) local_irq_enable(); 1192 for (;;)
1193 local_irq_enable();
1109 } 1194 }
1110 1195
1111 printk(KERN_INFO "Initializing CPU#%d\n", cpu); 1196 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
@@ -1114,15 +1199,14 @@ void __cpuinit cpu_init(void)
1114 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1199 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1115 1200
1116 load_idt(&idt_descr); 1201 load_idt(&idt_descr);
1117 switch_to_new_gdt(); 1202 switch_to_new_gdt(cpu);
1118 1203
1119 /* 1204 /*
1120 * Set up and load the per-CPU TSS and LDT 1205 * Set up and load the per-CPU TSS and LDT
1121 */ 1206 */
1122 atomic_inc(&init_mm.mm_count); 1207 atomic_inc(&init_mm.mm_count);
1123 curr->active_mm = &init_mm; 1208 curr->active_mm = &init_mm;
1124 if (curr->mm) 1209 BUG_ON(curr->mm);
1125 BUG();
1126 enter_lazy_tlb(&init_mm, curr); 1210 enter_lazy_tlb(&init_mm, curr);
1127 1211
1128 load_sp0(t, thread); 1212 load_sp0(t, thread);
@@ -1135,16 +1219,7 @@ void __cpuinit cpu_init(void)
1135 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); 1219 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
1136#endif 1220#endif
1137 1221
1138 /* Clear %gs. */ 1222 clear_all_debug_regs();
1139 asm volatile ("mov %0, %%gs" : : "r" (0));
1140
1141 /* Clear all 6 debug registers: */
1142 set_debugreg(0, 0);
1143 set_debugreg(0, 1);
1144 set_debugreg(0, 2);
1145 set_debugreg(0, 3);
1146 set_debugreg(0, 6);
1147 set_debugreg(0, 7);
1148 1223
1149 /* 1224 /*
1150 * Force FPU initialization: 1225 * Force FPU initialization:
@@ -1164,6 +1239,4 @@ void __cpuinit cpu_init(void)
1164 1239
1165 xsave_init(); 1240 xsave_init();
1166} 1241}
1167
1168
1169#endif 1242#endif
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index de4094a3921..9469ecb5aeb 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -5,15 +5,15 @@
5struct cpu_model_info { 5struct cpu_model_info {
6 int vendor; 6 int vendor;
7 int family; 7 int family;
8 char *model_names[16]; 8 const char *model_names[16];
9}; 9};
10 10
11/* attempt to consolidate cpu attributes */ 11/* attempt to consolidate cpu attributes */
12struct cpu_dev { 12struct cpu_dev {
13 char * c_vendor; 13 const char * c_vendor;
14 14
15 /* some have two possibilities for cpuid string */ 15 /* some have two possibilities for cpuid string */
16 char * c_ident[2]; 16 const char * c_ident[2];
17 17
18 struct cpu_model_info c_models[4]; 18 struct cpu_model_info c_models[4];
19 19
@@ -25,11 +25,12 @@ struct cpu_dev {
25}; 25};
26 26
27#define cpu_dev_register(cpu_devX) \ 27#define cpu_dev_register(cpu_devX) \
28 static struct cpu_dev *__cpu_dev_##cpu_devX __used \ 28 static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \
29 __attribute__((__section__(".x86_cpu_dev.init"))) = \ 29 __attribute__((__section__(".x86_cpu_dev.init"))) = \
30 &cpu_devX; 30 &cpu_devX;
31 31
32extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[]; 32extern const struct cpu_dev *const __x86_cpu_dev_start[],
33 *const __x86_cpu_dev_end[];
33 34
34extern void display_cacheinfo(struct cpuinfo_x86 *c); 35extern void display_cacheinfo(struct cpuinfo_x86 *c);
35 36
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
new file mode 100755
index 00000000000..21c0cf8ced1
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -0,0 +1,839 @@
1/*
2 * CPU x86 architecture debug code
3 *
4 * Copyright(C) 2009 Jaswinder Singh Rajput
5 *
6 * For licencing details see kernel-base/COPYING
7 */
8
9#include <linux/interrupt.h>
10#include <linux/compiler.h>
11#include <linux/seq_file.h>
12#include <linux/debugfs.h>
13#include <linux/kprobes.h>
14#include <linux/uaccess.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/percpu.h>
18#include <linux/signal.h>
19#include <linux/errno.h>
20#include <linux/sched.h>
21#include <linux/types.h>
22#include <linux/init.h>
23#include <linux/slab.h>
24#include <linux/smp.h>
25
26#include <asm/cpu_debug.h>
27#include <asm/paravirt.h>
28#include <asm/system.h>
29#include <asm/traps.h>
30#include <asm/apic.h>
31#include <asm/desc.h>
32
33static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
34static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
35static DEFINE_PER_CPU(unsigned, cpu_modelflag);
36static DEFINE_PER_CPU(int, cpu_priv_count);
37static DEFINE_PER_CPU(unsigned, cpu_model);
38
39static DEFINE_MUTEX(cpu_debug_lock);
40
41static struct dentry *cpu_debugfs_dir;
42
43static struct cpu_debug_base cpu_base[] = {
44 { "mc", CPU_MC, 0 },
45 { "monitor", CPU_MONITOR, 0 },
46 { "time", CPU_TIME, 0 },
47 { "pmc", CPU_PMC, 1 },
48 { "platform", CPU_PLATFORM, 0 },
49 { "apic", CPU_APIC, 0 },
50 { "poweron", CPU_POWERON, 0 },
51 { "control", CPU_CONTROL, 0 },
52 { "features", CPU_FEATURES, 0 },
53 { "lastbranch", CPU_LBRANCH, 0 },
54 { "bios", CPU_BIOS, 0 },
55 { "freq", CPU_FREQ, 0 },
56 { "mtrr", CPU_MTRR, 0 },
57 { "perf", CPU_PERF, 0 },
58 { "cache", CPU_CACHE, 0 },
59 { "sysenter", CPU_SYSENTER, 0 },
60 { "therm", CPU_THERM, 0 },
61 { "misc", CPU_MISC, 0 },
62 { "debug", CPU_DEBUG, 0 },
63 { "pat", CPU_PAT, 0 },
64 { "vmx", CPU_VMX, 0 },
65 { "call", CPU_CALL, 0 },
66 { "base", CPU_BASE, 0 },
67 { "smm", CPU_SMM, 0 },
68 { "svm", CPU_SVM, 0 },
69 { "osvm", CPU_OSVM, 0 },
70 { "tss", CPU_TSS, 0 },
71 { "cr", CPU_CR, 0 },
72 { "dt", CPU_DT, 0 },
73 { "registers", CPU_REG_ALL, 0 },
74};
75
76static struct cpu_file_base cpu_file[] = {
77 { "index", CPU_REG_ALL, 0 },
78 { "value", CPU_REG_ALL, 1 },
79};
80
81/* Intel Registers Range */
82static struct cpu_debug_range cpu_intel_range[] = {
83 { 0x00000000, 0x00000001, CPU_MC, CPU_INTEL_ALL },
84 { 0x00000006, 0x00000007, CPU_MONITOR, CPU_CX_AT_XE },
85 { 0x00000010, 0x00000010, CPU_TIME, CPU_INTEL_ALL },
86 { 0x00000011, 0x00000013, CPU_PMC, CPU_INTEL_PENTIUM },
87 { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE },
88 { 0x0000001B, 0x0000001B, CPU_APIC, CPU_P6_CX_AT_XE },
89
90 { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_PX_CX_AT_XE },
91 { 0x0000002B, 0x0000002B, CPU_POWERON, CPU_INTEL_XEON },
92 { 0x0000002C, 0x0000002C, CPU_FREQ, CPU_INTEL_XEON },
93 { 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CX_AT_XE },
94
95 { 0x00000040, 0x00000043, CPU_LBRANCH, CPU_PM_CX_AT_XE },
96 { 0x00000044, 0x00000047, CPU_LBRANCH, CPU_PM_CO_AT },
97 { 0x00000060, 0x00000063, CPU_LBRANCH, CPU_C2_AT },
98 { 0x00000064, 0x00000067, CPU_LBRANCH, CPU_INTEL_ATOM },
99
100 { 0x00000079, 0x00000079, CPU_BIOS, CPU_P6_CX_AT_XE },
101 { 0x00000088, 0x0000008A, CPU_CACHE, CPU_INTEL_P6 },
102 { 0x0000008B, 0x0000008B, CPU_BIOS, CPU_P6_CX_AT_XE },
103 { 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_INTEL_XEON },
104
105 { 0x000000C1, 0x000000C2, CPU_PMC, CPU_P6_CX_AT },
106 { 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CX_AT },
107 { 0x000000E7, 0x000000E8, CPU_PERF, CPU_CX_AT },
108 { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_P6_CX_XE },
109
110 { 0x00000116, 0x00000116, CPU_CACHE, CPU_INTEL_P6 },
111 { 0x00000118, 0x00000118, CPU_CACHE, CPU_INTEL_P6 },
112 { 0x00000119, 0x00000119, CPU_CACHE, CPU_INTEL_PX },
113 { 0x0000011A, 0x0000011B, CPU_CACHE, CPU_INTEL_P6 },
114 { 0x0000011E, 0x0000011E, CPU_CACHE, CPU_PX_CX_AT },
115
116 { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE },
117 { 0x00000179, 0x0000017A, CPU_MC, CPU_PX_CX_AT_XE },
118 { 0x0000017B, 0x0000017B, CPU_MC, CPU_P6_XE },
119 { 0x00000186, 0x00000187, CPU_PMC, CPU_P6_CX_AT },
120 { 0x00000198, 0x00000199, CPU_PERF, CPU_PM_CX_AT_XE },
121 { 0x0000019A, 0x0000019A, CPU_TIME, CPU_PM_CX_AT_XE },
122 { 0x0000019B, 0x0000019D, CPU_THERM, CPU_PM_CX_AT_XE },
123 { 0x000001A0, 0x000001A0, CPU_MISC, CPU_PM_CX_AT_XE },
124
125 { 0x000001C9, 0x000001C9, CPU_LBRANCH, CPU_PM_CX_AT },
126 { 0x000001D7, 0x000001D8, CPU_LBRANCH, CPU_INTEL_XEON },
127 { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CX_AT_XE },
128 { 0x000001DA, 0x000001DA, CPU_LBRANCH, CPU_INTEL_XEON },
129 { 0x000001DB, 0x000001DB, CPU_LBRANCH, CPU_P6_XE },
130 { 0x000001DC, 0x000001DC, CPU_LBRANCH, CPU_INTEL_P6 },
131 { 0x000001DD, 0x000001DE, CPU_LBRANCH, CPU_PX_CX_AT_XE },
132 { 0x000001E0, 0x000001E0, CPU_LBRANCH, CPU_INTEL_P6 },
133
134 { 0x00000200, 0x0000020F, CPU_MTRR, CPU_P6_CX_XE },
135 { 0x00000250, 0x00000250, CPU_MTRR, CPU_P6_CX_XE },
136 { 0x00000258, 0x00000259, CPU_MTRR, CPU_P6_CX_XE },
137 { 0x00000268, 0x0000026F, CPU_MTRR, CPU_P6_CX_XE },
138 { 0x00000277, 0x00000277, CPU_PAT, CPU_C2_AT_XE },
139 { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_P6_CX_XE },
140
141 { 0x00000300, 0x00000308, CPU_PMC, CPU_INTEL_XEON },
142 { 0x00000309, 0x0000030B, CPU_PMC, CPU_C2_AT_XE },
143 { 0x0000030C, 0x00000311, CPU_PMC, CPU_INTEL_XEON },
144 { 0x00000345, 0x00000345, CPU_PMC, CPU_C2_AT },
145 { 0x00000360, 0x00000371, CPU_PMC, CPU_INTEL_XEON },
146 { 0x0000038D, 0x00000390, CPU_PMC, CPU_C2_AT },
147 { 0x000003A0, 0x000003BE, CPU_PMC, CPU_INTEL_XEON },
148 { 0x000003C0, 0x000003CD, CPU_PMC, CPU_INTEL_XEON },
149 { 0x000003E0, 0x000003E1, CPU_PMC, CPU_INTEL_XEON },
150 { 0x000003F0, 0x000003F0, CPU_PMC, CPU_INTEL_XEON },
151 { 0x000003F1, 0x000003F1, CPU_PMC, CPU_C2_AT_XE },
152 { 0x000003F2, 0x000003F2, CPU_PMC, CPU_INTEL_XEON },
153
154 { 0x00000400, 0x00000402, CPU_MC, CPU_PM_CX_AT_XE },
155 { 0x00000403, 0x00000403, CPU_MC, CPU_INTEL_XEON },
156 { 0x00000404, 0x00000406, CPU_MC, CPU_PM_CX_AT_XE },
157 { 0x00000407, 0x00000407, CPU_MC, CPU_INTEL_XEON },
158 { 0x00000408, 0x0000040A, CPU_MC, CPU_PM_CX_AT_XE },
159 { 0x0000040B, 0x0000040B, CPU_MC, CPU_INTEL_XEON },
160 { 0x0000040C, 0x0000040E, CPU_MC, CPU_PM_CX_XE },
161 { 0x0000040F, 0x0000040F, CPU_MC, CPU_INTEL_XEON },
162 { 0x00000410, 0x00000412, CPU_MC, CPU_PM_CX_AT_XE },
163 { 0x00000413, 0x00000417, CPU_MC, CPU_CX_AT_XE },
164 { 0x00000480, 0x0000048B, CPU_VMX, CPU_CX_AT_XE },
165
166 { 0x00000600, 0x00000600, CPU_DEBUG, CPU_PM_CX_AT_XE },
167 { 0x00000680, 0x0000068F, CPU_LBRANCH, CPU_INTEL_XEON },
168 { 0x000006C0, 0x000006CF, CPU_LBRANCH, CPU_INTEL_XEON },
169
170 { 0x000107CC, 0x000107D3, CPU_PMC, CPU_INTEL_XEON_MP },
171
172 { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON },
173 { 0xC0000081, 0xC0000082, CPU_CALL, CPU_INTEL_XEON },
174 { 0xC0000084, 0xC0000084, CPU_CALL, CPU_INTEL_XEON },
175 { 0xC0000100, 0xC0000102, CPU_BASE, CPU_INTEL_XEON },
176};
177
178/* AMD Registers Range */
179static struct cpu_debug_range cpu_amd_range[] = {
180 { 0x00000010, 0x00000010, CPU_TIME, CPU_ALL, },
181 { 0x0000001B, 0x0000001B, CPU_APIC, CPU_ALL, },
182 { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_ALL, },
183
184 { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_ALL, },
185 { 0x00000179, 0x0000017A, CPU_MC, CPU_ALL, },
186 { 0x0000017B, 0x0000017B, CPU_MC, CPU_ALL, },
187 { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_ALL, },
188 { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_ALL, },
189
190 { 0x00000200, 0x0000020F, CPU_MTRR, CPU_ALL, },
191 { 0x00000250, 0x00000250, CPU_MTRR, CPU_ALL, },
192 { 0x00000258, 0x00000259, CPU_MTRR, CPU_ALL, },
193 { 0x00000268, 0x0000026F, CPU_MTRR, CPU_ALL, },
194 { 0x00000277, 0x00000277, CPU_PAT, CPU_ALL, },
195 { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_ALL, },
196
197 { 0x00000400, 0x00000417, CPU_MC, CPU_ALL, },
198
199 { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_ALL, },
200 { 0xC0000081, 0xC0000084, CPU_CALL, CPU_ALL, },
201 { 0xC0000100, 0xC0000102, CPU_BASE, CPU_ALL, },
202 { 0xC0000103, 0xC0000103, CPU_TIME, CPU_ALL, },
203
204 { 0xC0000408, 0xC000040A, CPU_MC, CPU_ALL, },
205
206 { 0xc0010000, 0xc0010007, CPU_PMC, CPU_ALL, },
207 { 0xc0010010, 0xc0010010, CPU_MTRR, CPU_ALL, },
208 { 0xc0010016, 0xc001001A, CPU_MTRR, CPU_ALL, },
209 { 0xc001001D, 0xc001001D, CPU_MTRR, CPU_ALL, },
210 { 0xc0010030, 0xc0010035, CPU_BIOS, CPU_ALL, },
211 { 0xc0010056, 0xc0010056, CPU_SMM, CPU_ALL, },
212 { 0xc0010061, 0xc0010063, CPU_SMM, CPU_ALL, },
213 { 0xc0010074, 0xc0010074, CPU_MC, CPU_ALL, },
214 { 0xc0010111, 0xc0010113, CPU_SMM, CPU_ALL, },
215 { 0xc0010114, 0xc0010118, CPU_SVM, CPU_ALL, },
216 { 0xc0010119, 0xc001011A, CPU_SMM, CPU_ALL, },
217 { 0xc0010140, 0xc0010141, CPU_OSVM, CPU_ALL, },
218 { 0xc0010156, 0xc0010156, CPU_SMM, CPU_ALL, },
219};
220
221
222static int get_cpu_modelflag(unsigned cpu)
223{
224 int flag;
225
226 switch (per_cpu(cpu_model, cpu)) {
227 /* Intel */
228 case 0x0501:
229 case 0x0502:
230 case 0x0504:
231 flag = CPU_INTEL_PENTIUM;
232 break;
233 case 0x0601:
234 case 0x0603:
235 case 0x0605:
236 case 0x0607:
237 case 0x0608:
238 case 0x060A:
239 case 0x060B:
240 flag = CPU_INTEL_P6;
241 break;
242 case 0x0609:
243 case 0x060D:
244 flag = CPU_INTEL_PENTIUM_M;
245 break;
246 case 0x060E:
247 flag = CPU_INTEL_CORE;
248 break;
249 case 0x060F:
250 case 0x0617:
251 flag = CPU_INTEL_CORE2;
252 break;
253 case 0x061C:
254 flag = CPU_INTEL_ATOM;
255 break;
256 case 0x0F00:
257 case 0x0F01:
258 case 0x0F02:
259 case 0x0F03:
260 case 0x0F04:
261 flag = CPU_INTEL_XEON_P4;
262 break;
263 case 0x0F06:
264 flag = CPU_INTEL_XEON_MP;
265 break;
266 default:
267 flag = CPU_NONE;
268 break;
269 }
270
271 return flag;
272}
273
274static int get_cpu_range_count(unsigned cpu)
275{
276 int index;
277
278 switch (per_cpu(cpu_model, cpu) >> 16) {
279 case X86_VENDOR_INTEL:
280 index = ARRAY_SIZE(cpu_intel_range);
281 break;
282 case X86_VENDOR_AMD:
283 index = ARRAY_SIZE(cpu_amd_range);
284 break;
285 default:
286 index = 0;
287 break;
288 }
289
290 return index;
291}
292
293static int is_typeflag_valid(unsigned cpu, unsigned flag)
294{
295 unsigned vendor, modelflag;
296 int i, index;
297
298 /* Standard Registers should be always valid */
299 if (flag >= CPU_TSS)
300 return 1;
301
302 modelflag = per_cpu(cpu_modelflag, cpu);
303 vendor = per_cpu(cpu_model, cpu) >> 16;
304 index = get_cpu_range_count(cpu);
305
306 for (i = 0; i < index; i++) {
307 switch (vendor) {
308 case X86_VENDOR_INTEL:
309 if ((cpu_intel_range[i].model & modelflag) &&
310 (cpu_intel_range[i].flag & flag))
311 return 1;
312 break;
313 case X86_VENDOR_AMD:
314 if (cpu_amd_range[i].flag & flag)
315 return 1;
316 break;
317 }
318 }
319
320 /* Invalid */
321 return 0;
322}
323
324static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
325 int index, unsigned flag)
326{
327 unsigned modelflag;
328
329 modelflag = per_cpu(cpu_modelflag, cpu);
330 *max = 0;
331 switch (per_cpu(cpu_model, cpu) >> 16) {
332 case X86_VENDOR_INTEL:
333 if ((cpu_intel_range[index].model & modelflag) &&
334 (cpu_intel_range[index].flag & flag)) {
335 *min = cpu_intel_range[index].min;
336 *max = cpu_intel_range[index].max;
337 }
338 break;
339 case X86_VENDOR_AMD:
340 if (cpu_amd_range[index].flag & flag) {
341 *min = cpu_amd_range[index].min;
342 *max = cpu_amd_range[index].max;
343 }
344 break;
345 }
346
347 return *max;
348}
349
350/* This function can also be called with seq = NULL for printk */
351static void print_cpu_data(struct seq_file *seq, unsigned type,
352 u32 low, u32 high)
353{
354 struct cpu_private *priv;
355 u64 val = high;
356
357 if (seq) {
358 priv = seq->private;
359 if (priv->file) {
360 val = (val << 32) | low;
361 seq_printf(seq, "0x%llx\n", val);
362 } else
363 seq_printf(seq, " %08x: %08x_%08x\n",
364 type, high, low);
365 } else
366 printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low);
367}
368
369/* This function can also be called with seq = NULL for printk */
370static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
371{
372 unsigned msr, msr_min, msr_max;
373 struct cpu_private *priv;
374 u32 low, high;
375 int i, range;
376
377 if (seq) {
378 priv = seq->private;
379 if (priv->file) {
380 if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg,
381 &low, &high))
382 print_cpu_data(seq, priv->reg, low, high);
383 return;
384 }
385 }
386
387 range = get_cpu_range_count(cpu);
388
389 for (i = 0; i < range; i++) {
390 if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
391 continue;
392
393 for (msr = msr_min; msr <= msr_max; msr++) {
394 if (rdmsr_safe_on_cpu(cpu, msr, &low, &high))
395 continue;
396 print_cpu_data(seq, msr, low, high);
397 }
398 }
399}
400
401static void print_tss(void *arg)
402{
403 struct pt_regs *regs = task_pt_regs(current);
404 struct seq_file *seq = arg;
405 unsigned int seg;
406
407 seq_printf(seq, " RAX\t: %016lx\n", regs->ax);
408 seq_printf(seq, " RBX\t: %016lx\n", regs->bx);
409 seq_printf(seq, " RCX\t: %016lx\n", regs->cx);
410 seq_printf(seq, " RDX\t: %016lx\n", regs->dx);
411
412 seq_printf(seq, " RSI\t: %016lx\n", regs->si);
413 seq_printf(seq, " RDI\t: %016lx\n", regs->di);
414 seq_printf(seq, " RBP\t: %016lx\n", regs->bp);
415 seq_printf(seq, " ESP\t: %016lx\n", regs->sp);
416
417#ifdef CONFIG_X86_64
418 seq_printf(seq, " R08\t: %016lx\n", regs->r8);
419 seq_printf(seq, " R09\t: %016lx\n", regs->r9);
420 seq_printf(seq, " R10\t: %016lx\n", regs->r10);
421 seq_printf(seq, " R11\t: %016lx\n", regs->r11);
422 seq_printf(seq, " R12\t: %016lx\n", regs->r12);
423 seq_printf(seq, " R13\t: %016lx\n", regs->r13);
424 seq_printf(seq, " R14\t: %016lx\n", regs->r14);
425 seq_printf(seq, " R15\t: %016lx\n", regs->r15);
426#endif
427
428 asm("movl %%cs,%0" : "=r" (seg));
429 seq_printf(seq, " CS\t: %04x\n", seg);
430 asm("movl %%ds,%0" : "=r" (seg));
431 seq_printf(seq, " DS\t: %04x\n", seg);
432 seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff);
433 asm("movl %%es,%0" : "=r" (seg));
434 seq_printf(seq, " ES\t: %04x\n", seg);
435 asm("movl %%fs,%0" : "=r" (seg));
436 seq_printf(seq, " FS\t: %04x\n", seg);
437 asm("movl %%gs,%0" : "=r" (seg));
438 seq_printf(seq, " GS\t: %04x\n", seg);
439
440 seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags);
441
442 seq_printf(seq, " EIP\t: %016lx\n", regs->ip);
443}
444
445static void print_cr(void *arg)
446{
447 struct seq_file *seq = arg;
448
449 seq_printf(seq, " cr0\t: %016lx\n", read_cr0());
450 seq_printf(seq, " cr2\t: %016lx\n", read_cr2());
451 seq_printf(seq, " cr3\t: %016lx\n", read_cr3());
452 seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe());
453#ifdef CONFIG_X86_64
454 seq_printf(seq, " cr8\t: %016lx\n", read_cr8());
455#endif
456}
457
458static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt)
459{
460 seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size));
461}
462
463static void print_dt(void *seq)
464{
465 struct desc_ptr dt;
466 unsigned long ldt;
467
468 /* IDT */
469 store_idt((struct desc_ptr *)&dt);
470 print_desc_ptr("IDT", seq, dt);
471
472 /* GDT */
473 store_gdt((struct desc_ptr *)&dt);
474 print_desc_ptr("GDT", seq, dt);
475
476 /* LDT */
477 store_ldt(ldt);
478 seq_printf(seq, " LDT\t: %016lx\n", ldt);
479
480 /* TR */
481 store_tr(ldt);
482 seq_printf(seq, " TR\t: %016lx\n", ldt);
483}
484
485static void print_dr(void *arg)
486{
487 struct seq_file *seq = arg;
488 unsigned long dr;
489 int i;
490
491 for (i = 0; i < 8; i++) {
492 /* Ignore db4, db5 */
493 if ((i == 4) || (i == 5))
494 continue;
495 get_debugreg(dr, i);
496 seq_printf(seq, " dr%d\t: %016lx\n", i, dr);
497 }
498
499 seq_printf(seq, "\n MSR\t:\n");
500}
501
502static void print_apic(void *arg)
503{
504 struct seq_file *seq = arg;
505
506#ifdef CONFIG_X86_LOCAL_APIC
507 seq_printf(seq, " LAPIC\t:\n");
508 seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24);
509 seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR));
510 seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI));
511 seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI));
512 seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI));
513 seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR));
514 seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR));
515 seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV));
516 seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR));
517 seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR));
518 seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR));
519 seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2));
520 seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT));
521 seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR));
522 seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC));
523 seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0));
524 seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1));
525 seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR));
526 seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT));
527 seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT));
528 seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR));
529#endif /* CONFIG_X86_LOCAL_APIC */
530
531 seq_printf(seq, "\n MSR\t:\n");
532}
533
534static int cpu_seq_show(struct seq_file *seq, void *v)
535{
536 struct cpu_private *priv = seq->private;
537
538 if (priv == NULL)
539 return -EINVAL;
540
541 switch (cpu_base[priv->type].flag) {
542 case CPU_TSS:
543 smp_call_function_single(priv->cpu, print_tss, seq, 1);
544 break;
545 case CPU_CR:
546 smp_call_function_single(priv->cpu, print_cr, seq, 1);
547 break;
548 case CPU_DT:
549 smp_call_function_single(priv->cpu, print_dt, seq, 1);
550 break;
551 case CPU_DEBUG:
552 if (priv->file == CPU_INDEX_BIT)
553 smp_call_function_single(priv->cpu, print_dr, seq, 1);
554 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
555 break;
556 case CPU_APIC:
557 if (priv->file == CPU_INDEX_BIT)
558 smp_call_function_single(priv->cpu, print_apic, seq, 1);
559 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
560 break;
561
562 default:
563 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
564 break;
565 }
566 seq_printf(seq, "\n");
567
568 return 0;
569}
570
571static void *cpu_seq_start(struct seq_file *seq, loff_t *pos)
572{
573 if (*pos == 0) /* One time is enough ;-) */
574 return seq;
575
576 return NULL;
577}
578
579static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
580{
581 (*pos)++;
582
583 return cpu_seq_start(seq, pos);
584}
585
586static void cpu_seq_stop(struct seq_file *seq, void *v)
587{
588}
589
590static const struct seq_operations cpu_seq_ops = {
591 .start = cpu_seq_start,
592 .next = cpu_seq_next,
593 .stop = cpu_seq_stop,
594 .show = cpu_seq_show,
595};
596
597static int cpu_seq_open(struct inode *inode, struct file *file)
598{
599 struct cpu_private *priv = inode->i_private;
600 struct seq_file *seq;
601 int err;
602
603 err = seq_open(file, &cpu_seq_ops);
604 if (!err) {
605 seq = file->private_data;
606 seq->private = priv;
607 }
608
609 return err;
610}
611
612static int write_msr(struct cpu_private *priv, u64 val)
613{
614 u32 low, high;
615
616 high = (val >> 32) & 0xffffffff;
617 low = val & 0xffffffff;
618
619 if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high))
620 return 0;
621
622 return -EPERM;
623}
624
625static int write_cpu_register(struct cpu_private *priv, const char *buf)
626{
627 int ret = -EPERM;
628 u64 val;
629
630 ret = strict_strtoull(buf, 0, &val);
631 if (ret < 0)
632 return ret;
633
634 /* Supporting only MSRs */
635 if (priv->type < CPU_TSS_BIT)
636 return write_msr(priv, val);
637
638 return ret;
639}
640
641static ssize_t cpu_write(struct file *file, const char __user *ubuf,
642 size_t count, loff_t *off)
643{
644 struct seq_file *seq = file->private_data;
645 struct cpu_private *priv = seq->private;
646 char buf[19];
647
648 if ((priv == NULL) || (count >= sizeof(buf)))
649 return -EINVAL;
650
651 if (copy_from_user(&buf, ubuf, count))
652 return -EFAULT;
653
654 buf[count] = 0;
655
656 if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write))
657 if (!write_cpu_register(priv, buf))
658 return count;
659
660 return -EACCES;
661}
662
663static const struct file_operations cpu_fops = {
664 .owner = THIS_MODULE,
665 .open = cpu_seq_open,
666 .read = seq_read,
667 .write = cpu_write,
668 .llseek = seq_lseek,
669 .release = seq_release,
670};
671
672static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
673 unsigned file, struct dentry *dentry)
674{
675 struct cpu_private *priv = NULL;
676
677 /* Already intialized */
678 if (file == CPU_INDEX_BIT)
679 if (per_cpu(cpu_arr[type].init, cpu))
680 return 0;
681
682 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
683 if (priv == NULL)
684 return -ENOMEM;
685
686 priv->cpu = cpu;
687 priv->type = type;
688 priv->reg = reg;
689 priv->file = file;
690 mutex_lock(&cpu_debug_lock);
691 per_cpu(priv_arr[type], cpu) = priv;
692 per_cpu(cpu_priv_count, cpu)++;
693 mutex_unlock(&cpu_debug_lock);
694
695 if (file)
696 debugfs_create_file(cpu_file[file].name, S_IRUGO,
697 dentry, (void *)priv, &cpu_fops);
698 else {
699 debugfs_create_file(cpu_base[type].name, S_IRUGO,
700 per_cpu(cpu_arr[type].dentry, cpu),
701 (void *)priv, &cpu_fops);
702 mutex_lock(&cpu_debug_lock);
703 per_cpu(cpu_arr[type].init, cpu) = 1;
704 mutex_unlock(&cpu_debug_lock);
705 }
706
707 return 0;
708}
709
710static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg,
711 struct dentry *dentry)
712{
713 unsigned file;
714 int err = 0;
715
716 for (file = 0; file < ARRAY_SIZE(cpu_file); file++) {
717 err = cpu_create_file(cpu, type, reg, file, dentry);
718 if (err)
719 return err;
720 }
721
722 return err;
723}
724
725static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
726{
727 struct dentry *cpu_dentry = NULL;
728 unsigned reg, reg_min, reg_max;
729 int i, range, err = 0;
730 char reg_dir[12];
731 u32 low, high;
732
733 range = get_cpu_range_count(cpu);
734
735 for (i = 0; i < range; i++) {
736 if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
737 cpu_base[type].flag))
738 continue;
739
740 for (reg = reg_min; reg <= reg_max; reg++) {
741 if (rdmsr_safe_on_cpu(cpu, reg, &low, &high))
742 continue;
743
744 sprintf(reg_dir, "0x%x", reg);
745 cpu_dentry = debugfs_create_dir(reg_dir, dentry);
746 err = cpu_init_regfiles(cpu, type, reg, cpu_dentry);
747 if (err)
748 return err;
749 }
750 }
751
752 return err;
753}
754
755static int cpu_init_allreg(unsigned cpu, struct dentry *dentry)
756{
757 struct dentry *cpu_dentry = NULL;
758 unsigned type;
759 int err = 0;
760
761 for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) {
762 if (!is_typeflag_valid(cpu, cpu_base[type].flag))
763 continue;
764 cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry);
765 per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry;
766
767 if (type < CPU_TSS_BIT)
768 err = cpu_init_msr(cpu, type, cpu_dentry);
769 else
770 err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT,
771 cpu_dentry);
772 if (err)
773 return err;
774 }
775
776 return err;
777}
778
779static int cpu_init_cpu(void)
780{
781 struct dentry *cpu_dentry = NULL;
782 struct cpuinfo_x86 *cpui;
783 char cpu_dir[12];
784 unsigned cpu;
785 int err = 0;
786
787 for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
788 cpui = &cpu_data(cpu);
789 if (!cpu_has(cpui, X86_FEATURE_MSR))
790 continue;
791 per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) |
792 (cpui->x86 << 8) |
793 (cpui->x86_model));
794 per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu);
795
796 sprintf(cpu_dir, "cpu%d", cpu);
797 cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
798 err = cpu_init_allreg(cpu, cpu_dentry);
799
800 pr_info("cpu%d(%d) debug files %d\n",
801 cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu));
802 if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) {
803 pr_err("Register files count %d exceeds limit %d\n",
804 per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES);
805 per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES;
806 err = -ENFILE;
807 }
808 if (err)
809 return err;
810 }
811
812 return err;
813}
814
815static int __init cpu_debug_init(void)
816{
817 cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir);
818
819 return cpu_init_cpu();
820}
821
822static void __exit cpu_debug_exit(void)
823{
824 int i, cpu;
825
826 if (cpu_debugfs_dir)
827 debugfs_remove_recursive(cpu_debugfs_dir);
828
829 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
830 for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++)
831 kfree(per_cpu(priv_arr[i], cpu));
832}
833
834module_init(cpu_debug_init);
835module_exit(cpu_debug_exit);
836
837MODULE_AUTHOR("Jaswinder Singh Rajput");
838MODULE_DESCRIPTION("CPU Debug module");
839MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 4b1c319d30c..22590cf688a 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -601,7 +601,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
601 if (!data) 601 if (!data)
602 return -ENOMEM; 602 return -ENOMEM;
603 603
604 data->acpi_data = percpu_ptr(acpi_perf_data, cpu); 604 data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
605 per_cpu(drv_data, cpu) = data; 605 per_cpu(drv_data, cpu) = data;
606 606
607 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) 607 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index c2f930d8664..41ab3f064cb 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -204,12 +204,12 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
204 } 204 }
205 /* Enable Enhanced PowerSaver */ 205 /* Enable Enhanced PowerSaver */
206 rdmsrl(MSR_IA32_MISC_ENABLE, val); 206 rdmsrl(MSR_IA32_MISC_ENABLE, val);
207 if (!(val & 1 << 16)) { 207 if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
208 val |= 1 << 16; 208 val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
209 wrmsrl(MSR_IA32_MISC_ENABLE, val); 209 wrmsrl(MSR_IA32_MISC_ENABLE, val);
210 /* Can be locked at 0 */ 210 /* Can be locked at 0 */
211 rdmsrl(MSR_IA32_MISC_ENABLE, val); 211 rdmsrl(MSR_IA32_MISC_ENABLE, val);
212 if (!(val & 1 << 16)) { 212 if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
213 printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n"); 213 printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
214 return -ENODEV; 214 return -ENODEV;
215 } 215 }
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index f08998278a3..c9f1fdc0283 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -390,14 +390,14 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
390 enable it if not. */ 390 enable it if not. */
391 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 391 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
392 392
393 if (!(l & (1<<16))) { 393 if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
394 l |= (1<<16); 394 l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
395 dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); 395 dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
396 wrmsr(MSR_IA32_MISC_ENABLE, l, h); 396 wrmsr(MSR_IA32_MISC_ENABLE, l, h);
397 397
398 /* check to see if it stuck */ 398 /* check to see if it stuck */
399 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 399 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
400 if (!(l & (1<<16))) { 400 if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
401 printk(KERN_INFO PFX 401 printk(KERN_INFO PFX
402 "couldn't enable Enhanced SpeedStep\n"); 402 "couldn't enable Enhanced SpeedStep\n");
403 return -ENODEV; 403 return -ENODEV;
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index ffd0f5ed071..593171e967e 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -61,23 +61,23 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
61 */ 61 */
62static unsigned char Cx86_dir0_msb __cpuinitdata = 0; 62static unsigned char Cx86_dir0_msb __cpuinitdata = 0;
63 63
64static char Cx86_model[][9] __cpuinitdata = { 64static const char __cpuinitconst Cx86_model[][9] = {
65 "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", 65 "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
66 "M II ", "Unknown" 66 "M II ", "Unknown"
67}; 67};
68static char Cx486_name[][5] __cpuinitdata = { 68static const char __cpuinitconst Cx486_name[][5] = {
69 "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", 69 "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
70 "SRx2", "DRx2" 70 "SRx2", "DRx2"
71}; 71};
72static char Cx486S_name[][4] __cpuinitdata = { 72static const char __cpuinitconst Cx486S_name[][4] = {
73 "S", "S2", "Se", "S2e" 73 "S", "S2", "Se", "S2e"
74}; 74};
75static char Cx486D_name[][4] __cpuinitdata = { 75static const char __cpuinitconst Cx486D_name[][4] = {
76 "DX", "DX2", "?", "?", "?", "DX4" 76 "DX", "DX2", "?", "?", "?", "DX4"
77}; 77};
78static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; 78static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock";
79static char cyrix_model_mult1[] __cpuinitdata = "12??43"; 79static const char __cpuinitconst cyrix_model_mult1[] = "12??43";
80static char cyrix_model_mult2[] __cpuinitdata = "12233445"; 80static const char __cpuinitconst cyrix_model_mult2[] = "12233445";
81 81
82/* 82/*
83 * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old 83 * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
@@ -435,7 +435,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
435 } 435 }
436} 436}
437 437
438static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { 438static const struct cpu_dev __cpuinitconst cyrix_cpu_dev = {
439 .c_vendor = "Cyrix", 439 .c_vendor = "Cyrix",
440 .c_ident = { "CyrixInstead" }, 440 .c_ident = { "CyrixInstead" },
441 .c_early_init = early_init_cyrix, 441 .c_early_init = early_init_cyrix,
@@ -446,7 +446,7 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
446 446
447cpu_dev_register(cyrix_cpu_dev); 447cpu_dev_register(cyrix_cpu_dev);
448 448
449static struct cpu_dev nsc_cpu_dev __cpuinitdata = { 449static const struct cpu_dev __cpuinitconst nsc_cpu_dev = {
450 .c_vendor = "NSC", 450 .c_vendor = "NSC",
451 .c_ident = { "Geode by NSC" }, 451 .c_ident = { "Geode by NSC" },
452 .c_init = init_nsc, 452 .c_init = init_nsc,
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 24ff26a38ad..b09d4eb52bb 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -13,6 +13,7 @@
13#include <asm/uaccess.h> 13#include <asm/uaccess.h>
14#include <asm/ds.h> 14#include <asm/ds.h>
15#include <asm/bugs.h> 15#include <asm/bugs.h>
16#include <asm/cpu.h>
16 17
17#ifdef CONFIG_X86_64 18#ifdef CONFIG_X86_64
18#include <asm/topology.h> 19#include <asm/topology.h>
@@ -24,7 +25,6 @@
24#ifdef CONFIG_X86_LOCAL_APIC 25#ifdef CONFIG_X86_LOCAL_APIC
25#include <asm/mpspec.h> 26#include <asm/mpspec.h>
26#include <asm/apic.h> 27#include <asm/apic.h>
27#include <mach_apic.h>
28#endif 28#endif
29 29
30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) 30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
@@ -54,6 +54,11 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
54 c->x86_cache_alignment = 128; 54 c->x86_cache_alignment = 128;
55#endif 55#endif
56 56
57 /* CPUID workaround for 0F33/0F34 CPU */
58 if (c->x86 == 0xF && c->x86_model == 0x3
59 && (c->x86_mask == 0x3 || c->x86_mask == 0x4))
60 c->x86_phys_bits = 36;
61
57 /* 62 /*
58 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate 63 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
59 * with P/T states and does not stop in deep C-states 64 * with P/T states and does not stop in deep C-states
@@ -63,6 +68,18 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
63 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 68 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
64 } 69 }
65 70
71 /*
72 * There is a known erratum on Pentium III and Core Solo
73 * and Core Duo CPUs.
74 * " Page with PAT set to WC while associated MTRR is UC
75 * may consolidate to UC "
76 * Because of this erratum, it is better to stick with
77 * setting WC in MTRR rather than using PAT on these CPUs.
78 *
79 * Enable PAT WC only on P4, Core 2 or later CPUs.
80 */
81 if (c->x86 == 6 && c->x86_model < 15)
82 clear_cpu_cap(c, X86_FEATURE_PAT);
66} 83}
67 84
68#ifdef CONFIG_X86_32 85#ifdef CONFIG_X86_32
@@ -99,6 +116,28 @@ static void __cpuinit trap_init_f00f_bug(void)
99} 116}
100#endif 117#endif
101 118
119static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
120{
121#ifdef CONFIG_SMP
122 /* calling is from identify_secondary_cpu() ? */
123 if (c->cpu_index == boot_cpu_id)
124 return;
125
126 /*
127 * Mask B, Pentium, but not Pentium MMX
128 */
129 if (c->x86 == 5 &&
130 c->x86_mask >= 1 && c->x86_mask <= 4 &&
131 c->x86_model <= 3) {
132 /*
133 * Remember we have B step Pentia with bugs
134 */
135 WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
136 "with B stepping processors.\n");
137 }
138#endif
139}
140
102static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) 141static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
103{ 142{
104 unsigned long lo, hi; 143 unsigned long lo, hi;
@@ -135,10 +174,10 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
135 */ 174 */
136 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { 175 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
137 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); 176 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
138 if ((lo & (1<<9)) == 0) { 177 if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) {
139 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); 178 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
140 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); 179 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
141 lo |= (1<<9); /* Disable hw prefetching */ 180 lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE;
142 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); 181 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
143 } 182 }
144 } 183 }
@@ -175,6 +214,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
175#ifdef CONFIG_X86_NUMAQ 214#ifdef CONFIG_X86_NUMAQ
176 numaq_tsc_disable(); 215 numaq_tsc_disable();
177#endif 216#endif
217
218 intel_smp_check(c);
178} 219}
179#else 220#else
180static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) 221static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
@@ -374,7 +415,7 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i
374} 415}
375#endif 416#endif
376 417
377static struct cpu_dev intel_cpu_dev __cpuinitdata = { 418static const struct cpu_dev __cpuinitconst intel_cpu_dev = {
378 .c_vendor = "Intel", 419 .c_vendor = "Intel",
379 .c_ident = { "GenuineIntel" }, 420 .c_ident = { "GenuineIntel" },
380#ifdef CONFIG_X86_32 421#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index da299eb85fc..c471eb1a389 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -32,7 +32,7 @@ struct _cache_table
32}; 32};
33 33
34/* all the cache descriptor types we care about (no TLB or trace cache entries) */ 34/* all the cache descriptor types we care about (no TLB or trace cache entries) */
35static struct _cache_table cache_table[] __cpuinitdata = 35static const struct _cache_table __cpuinitconst cache_table[] =
36{ 36{
37 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ 37 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
38 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ 38 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
@@ -147,7 +147,16 @@ struct _cpuid4_info {
147 union _cpuid4_leaf_ecx ecx; 147 union _cpuid4_leaf_ecx ecx;
148 unsigned long size; 148 unsigned long size;
149 unsigned long can_disable; 149 unsigned long can_disable;
150 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ 150 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
151};
152
153/* subset of above _cpuid4_info w/o shared_cpu_map */
154struct _cpuid4_info_regs {
155 union _cpuid4_leaf_eax eax;
156 union _cpuid4_leaf_ebx ebx;
157 union _cpuid4_leaf_ecx ecx;
158 unsigned long size;
159 unsigned long can_disable;
151}; 160};
152 161
153#ifdef CONFIG_PCI 162#ifdef CONFIG_PCI
@@ -197,15 +206,15 @@ union l3_cache {
197 unsigned val; 206 unsigned val;
198}; 207};
199 208
200static unsigned short assocs[] __cpuinitdata = { 209static const unsigned short __cpuinitconst assocs[] = {
201 [1] = 1, [2] = 2, [4] = 4, [6] = 8, 210 [1] = 1, [2] = 2, [4] = 4, [6] = 8,
202 [8] = 16, [0xa] = 32, [0xb] = 48, 211 [8] = 16, [0xa] = 32, [0xb] = 48,
203 [0xc] = 64, 212 [0xc] = 64,
204 [0xf] = 0xffff // ?? 213 [0xf] = 0xffff // ??
205}; 214};
206 215
207static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; 216static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
208static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; 217static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
209 218
210static void __cpuinit 219static void __cpuinit
211amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, 220amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
@@ -278,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
278} 287}
279 288
280static void __cpuinit 289static void __cpuinit
281amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) 290amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
282{ 291{
283 if (index < 3) 292 if (index < 3)
284 return; 293 return;
@@ -286,7 +295,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
286} 295}
287 296
288static int 297static int
289__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 298__cpuinit cpuid4_cache_lookup_regs(int index,
299 struct _cpuid4_info_regs *this_leaf)
290{ 300{
291 union _cpuid4_leaf_eax eax; 301 union _cpuid4_leaf_eax eax;
292 union _cpuid4_leaf_ebx ebx; 302 union _cpuid4_leaf_ebx ebx;
@@ -314,6 +324,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
314 return 0; 324 return 0;
315} 325}
316 326
327static int
328__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
329{
330 struct _cpuid4_info_regs *leaf_regs =
331 (struct _cpuid4_info_regs *)this_leaf;
332
333 return cpuid4_cache_lookup_regs(index, leaf_regs);
334}
335
317static int __cpuinit find_num_cache_leaves(void) 336static int __cpuinit find_num_cache_leaves(void)
318{ 337{
319 unsigned int eax, ebx, ecx, edx; 338 unsigned int eax, ebx, ecx, edx;
@@ -353,11 +372,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
353 * parameters cpuid leaf to find the cache details 372 * parameters cpuid leaf to find the cache details
354 */ 373 */
355 for (i = 0; i < num_cache_leaves; i++) { 374 for (i = 0; i < num_cache_leaves; i++) {
356 struct _cpuid4_info this_leaf; 375 struct _cpuid4_info_regs this_leaf;
357
358 int retval; 376 int retval;
359 377
360 retval = cpuid4_cache_lookup(i, &this_leaf); 378 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
361 if (retval >= 0) { 379 if (retval >= 0) {
362 switch(this_leaf.eax.split.level) { 380 switch(this_leaf.eax.split.level) {
363 case 1: 381 case 1:
@@ -506,17 +524,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
506 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 524 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
507 525
508 if (num_threads_sharing == 1) 526 if (num_threads_sharing == 1)
509 cpu_set(cpu, this_leaf->shared_cpu_map); 527 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
510 else { 528 else {
511 index_msb = get_count_order(num_threads_sharing); 529 index_msb = get_count_order(num_threads_sharing);
512 530
513 for_each_online_cpu(i) { 531 for_each_online_cpu(i) {
514 if (cpu_data(i).apicid >> index_msb == 532 if (cpu_data(i).apicid >> index_msb ==
515 c->apicid >> index_msb) { 533 c->apicid >> index_msb) {
516 cpu_set(i, this_leaf->shared_cpu_map); 534 cpumask_set_cpu(i,
535 to_cpumask(this_leaf->shared_cpu_map));
517 if (i != cpu && per_cpu(cpuid4_info, i)) { 536 if (i != cpu && per_cpu(cpuid4_info, i)) {
518 sibling_leaf = CPUID4_INFO_IDX(i, index); 537 sibling_leaf =
519 cpu_set(cpu, sibling_leaf->shared_cpu_map); 538 CPUID4_INFO_IDX(i, index);
539 cpumask_set_cpu(cpu, to_cpumask(
540 sibling_leaf->shared_cpu_map));
520 } 541 }
521 } 542 }
522 } 543 }
@@ -528,9 +549,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
528 int sibling; 549 int sibling;
529 550
530 this_leaf = CPUID4_INFO_IDX(cpu, index); 551 this_leaf = CPUID4_INFO_IDX(cpu, index);
531 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { 552 for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
532 sibling_leaf = CPUID4_INFO_IDX(sibling, index); 553 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
533 cpu_clear(cpu, sibling_leaf->shared_cpu_map); 554 cpumask_clear_cpu(cpu,
555 to_cpumask(sibling_leaf->shared_cpu_map));
534 } 556 }
535} 557}
536#else 558#else
@@ -635,8 +657,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
635 int n = 0; 657 int n = 0;
636 658
637 if (len > 1) { 659 if (len > 1) {
638 cpumask_t *mask = &this_leaf->shared_cpu_map; 660 const struct cpumask *mask;
639 661
662 mask = to_cpumask(this_leaf->shared_cpu_map);
640 n = type? 663 n = type?
641 cpulist_scnprintf(buf, len-2, mask) : 664 cpulist_scnprintf(buf, len-2, mask) :
642 cpumask_scnprintf(buf, len-2, mask); 665 cpumask_scnprintf(buf, len-2, mask);
@@ -699,7 +722,8 @@ static struct pci_dev *get_k8_northbridge(int node)
699 722
700static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) 723static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
701{ 724{
702 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 725 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
726 int node = cpu_to_node(cpumask_first(mask));
703 struct pci_dev *dev = NULL; 727 struct pci_dev *dev = NULL;
704 ssize_t ret = 0; 728 ssize_t ret = 0;
705 int i; 729 int i;
@@ -733,7 +757,8 @@ static ssize_t
733store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, 757store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
734 size_t count) 758 size_t count)
735{ 759{
736 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 760 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
761 int node = cpu_to_node(cpumask_first(mask));
737 struct pci_dev *dev = NULL; 762 struct pci_dev *dev = NULL;
738 unsigned int ret, index, val; 763 unsigned int ret, index, val;
739 764
@@ -878,7 +903,7 @@ err_out:
878 return -ENOMEM; 903 return -ENOMEM;
879} 904}
880 905
881static cpumask_t cache_dev_map = CPU_MASK_NONE; 906static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
882 907
883/* Add/Remove cache interface for CPU device */ 908/* Add/Remove cache interface for CPU device */
884static int __cpuinit cache_add_dev(struct sys_device * sys_dev) 909static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
@@ -918,7 +943,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
918 } 943 }
919 kobject_uevent(&(this_object->kobj), KOBJ_ADD); 944 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
920 } 945 }
921 cpu_set(cpu, cache_dev_map); 946 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
922 947
923 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); 948 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
924 return 0; 949 return 0;
@@ -931,9 +956,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
931 956
932 if (per_cpu(cpuid4_info, cpu) == NULL) 957 if (per_cpu(cpuid4_info, cpu) == NULL)
933 return; 958 return;
934 if (!cpu_isset(cpu, cache_dev_map)) 959 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
935 return; 960 return;
936 cpu_clear(cpu, cache_dev_map); 961 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
937 962
938 for (i = 0; i < num_cache_leaves; i++) 963 for (i = 0; i < num_cache_leaves; i++)
939 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); 964 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index d7d2323bbb6..b2f89829bbe 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o
4obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o 4obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o
5obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o 5obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o
6obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o 6obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
7obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index dfaebce3633..3552119b091 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -60,20 +60,6 @@ void mcheck_init(struct cpuinfo_x86 *c)
60 } 60 }
61} 61}
62 62
63static unsigned long old_cr4 __initdata;
64
65void __init stop_mce(void)
66{
67 old_cr4 = read_cr4();
68 clear_in_cr4(X86_CR4_MCE);
69}
70
71void __init restart_mce(void)
72{
73 if (old_cr4 & X86_CR4_MCE)
74 set_in_cr4(X86_CR4_MCE);
75}
76
77static int __init mcheck_disable(char *str) 63static int __init mcheck_disable(char *str)
78{ 64{
79 mce_disabled = 1; 65 mce_disabled = 1;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index fe79985ce0f..ca14604611e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -3,6 +3,8 @@
3 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. 3 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
4 * Rest from unknown author(s). 4 * Rest from unknown author(s).
5 * 2004 Andi Kleen. Rewrote most of it. 5 * 2004 Andi Kleen. Rewrote most of it.
6 * Copyright 2008 Intel Corporation
7 * Author: Andi Kleen
6 */ 8 */
7 9
8#include <linux/init.h> 10#include <linux/init.h>
@@ -24,6 +26,9 @@
24#include <linux/ctype.h> 26#include <linux/ctype.h>
25#include <linux/kmod.h> 27#include <linux/kmod.h>
26#include <linux/kdebug.h> 28#include <linux/kdebug.h>
29#include <linux/kobject.h>
30#include <linux/sysfs.h>
31#include <linux/ratelimit.h>
27#include <asm/processor.h> 32#include <asm/processor.h>
28#include <asm/msr.h> 33#include <asm/msr.h>
29#include <asm/mce.h> 34#include <asm/mce.h>
@@ -32,7 +37,6 @@
32#include <asm/idle.h> 37#include <asm/idle.h>
33 38
34#define MISC_MCELOG_MINOR 227 39#define MISC_MCELOG_MINOR 227
35#define NR_SYSFS_BANKS 6
36 40
37atomic_t mce_entry; 41atomic_t mce_entry;
38 42
@@ -47,7 +51,7 @@ static int mce_dont_init;
47 */ 51 */
48static int tolerant = 1; 52static int tolerant = 1;
49static int banks; 53static int banks;
50static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL }; 54static u64 *bank;
51static unsigned long notify_user; 55static unsigned long notify_user;
52static int rip_msr; 56static int rip_msr;
53static int mce_bootlog = -1; 57static int mce_bootlog = -1;
@@ -58,6 +62,19 @@ static char *trigger_argv[2] = { trigger, NULL };
58 62
59static DECLARE_WAIT_QUEUE_HEAD(mce_wait); 63static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
60 64
65/* MCA banks polled by the period polling timer for corrected events */
66DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
67 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
68};
69
70/* Do initial initialization of a struct mce */
71void mce_setup(struct mce *m)
72{
73 memset(m, 0, sizeof(struct mce));
74 m->cpu = smp_processor_id();
75 rdtscll(m->tsc);
76}
77
61/* 78/*
62 * Lockless MCE logging infrastructure. 79 * Lockless MCE logging infrastructure.
63 * This avoids deadlocks on printk locks without having to break locks. Also 80 * This avoids deadlocks on printk locks without having to break locks. Also
@@ -119,11 +136,11 @@ static void print_mce(struct mce *m)
119 print_symbol("{%s}", m->ip); 136 print_symbol("{%s}", m->ip);
120 printk("\n"); 137 printk("\n");
121 } 138 }
122 printk(KERN_EMERG "TSC %Lx ", m->tsc); 139 printk(KERN_EMERG "TSC %llx ", m->tsc);
123 if (m->addr) 140 if (m->addr)
124 printk("ADDR %Lx ", m->addr); 141 printk("ADDR %llx ", m->addr);
125 if (m->misc) 142 if (m->misc)
126 printk("MISC %Lx ", m->misc); 143 printk("MISC %llx ", m->misc);
127 printk("\n"); 144 printk("\n");
128 printk(KERN_EMERG "This is not a software problem!\n"); 145 printk(KERN_EMERG "This is not a software problem!\n");
129 printk(KERN_EMERG "Run through mcelog --ascii to decode " 146 printk(KERN_EMERG "Run through mcelog --ascii to decode "
@@ -149,8 +166,10 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start)
149 panic(msg); 166 panic(msg);
150} 167}
151 168
152static int mce_available(struct cpuinfo_x86 *c) 169int mce_available(struct cpuinfo_x86 *c)
153{ 170{
171 if (mce_dont_init)
172 return 0;
154 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); 173 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
155} 174}
156 175
@@ -172,7 +191,77 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
172} 191}
173 192
174/* 193/*
175 * The actual machine check handler 194 * Poll for corrected events or events that happened before reset.
195 * Those are just logged through /dev/mcelog.
196 *
197 * This is executed in standard interrupt context.
198 */
199void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
200{
201 struct mce m;
202 int i;
203
204 mce_setup(&m);
205
206 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
207 for (i = 0; i < banks; i++) {
208 if (!bank[i] || !test_bit(i, *b))
209 continue;
210
211 m.misc = 0;
212 m.addr = 0;
213 m.bank = i;
214 m.tsc = 0;
215
216 barrier();
217 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
218 if (!(m.status & MCI_STATUS_VAL))
219 continue;
220
221 /*
222 * Uncorrected events are handled by the exception handler
223 * when it is enabled. But when the exception is disabled log
224 * everything.
225 *
226 * TBD do the same check for MCI_STATUS_EN here?
227 */
228 if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
229 continue;
230
231 if (m.status & MCI_STATUS_MISCV)
232 rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
233 if (m.status & MCI_STATUS_ADDRV)
234 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
235
236 if (!(flags & MCP_TIMESTAMP))
237 m.tsc = 0;
238 /*
239 * Don't get the IP here because it's unlikely to
240 * have anything to do with the actual error location.
241 */
242
243 mce_log(&m);
244 add_taint(TAINT_MACHINE_CHECK);
245
246 /*
247 * Clear state for this bank.
248 */
249 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
250 }
251
252 /*
253 * Don't clear MCG_STATUS here because it's only defined for
254 * exceptions.
255 */
256}
257
258/*
259 * The actual machine check handler. This only handles real
260 * exceptions when something got corrupted coming in through int 18.
261 *
262 * This is executed in NMI context not subject to normal locking rules. This
263 * implies that most kernel services cannot be safely used. Don't even
264 * think about putting a printk in there!
176 */ 265 */
177void do_machine_check(struct pt_regs * regs, long error_code) 266void do_machine_check(struct pt_regs * regs, long error_code)
178{ 267{
@@ -190,17 +279,18 @@ void do_machine_check(struct pt_regs * regs, long error_code)
190 * error. 279 * error.
191 */ 280 */
192 int kill_it = 0; 281 int kill_it = 0;
282 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
193 283
194 atomic_inc(&mce_entry); 284 atomic_inc(&mce_entry);
195 285
196 if ((regs 286 if (notify_die(DIE_NMI, "machine check", regs, error_code,
197 && notify_die(DIE_NMI, "machine check", regs, error_code,
198 18, SIGKILL) == NOTIFY_STOP) 287 18, SIGKILL) == NOTIFY_STOP)
199 || !banks) 288 goto out2;
289 if (!banks)
200 goto out2; 290 goto out2;
201 291
202 memset(&m, 0, sizeof(struct mce)); 292 mce_setup(&m);
203 m.cpu = smp_processor_id(); 293
204 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); 294 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
205 /* if the restart IP is not valid, we're done for */ 295 /* if the restart IP is not valid, we're done for */
206 if (!(m.mcgstatus & MCG_STATUS_RIPV)) 296 if (!(m.mcgstatus & MCG_STATUS_RIPV))
@@ -210,18 +300,32 @@ void do_machine_check(struct pt_regs * regs, long error_code)
210 barrier(); 300 barrier();
211 301
212 for (i = 0; i < banks; i++) { 302 for (i = 0; i < banks; i++) {
213 if (i < NR_SYSFS_BANKS && !bank[i]) 303 __clear_bit(i, toclear);
304 if (!bank[i])
214 continue; 305 continue;
215 306
216 m.misc = 0; 307 m.misc = 0;
217 m.addr = 0; 308 m.addr = 0;
218 m.bank = i; 309 m.bank = i;
219 m.tsc = 0;
220 310
221 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); 311 rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
222 if ((m.status & MCI_STATUS_VAL) == 0) 312 if ((m.status & MCI_STATUS_VAL) == 0)
223 continue; 313 continue;
224 314
315 /*
316 * Non uncorrected errors are handled by machine_check_poll
317 * Leave them alone.
318 */
319 if ((m.status & MCI_STATUS_UC) == 0)
320 continue;
321
322 /*
323 * Set taint even when machine check was not enabled.
324 */
325 add_taint(TAINT_MACHINE_CHECK);
326
327 __set_bit(i, toclear);
328
225 if (m.status & MCI_STATUS_EN) { 329 if (m.status & MCI_STATUS_EN) {
226 /* if PCC was set, there's no way out */ 330 /* if PCC was set, there's no way out */
227 no_way_out |= !!(m.status & MCI_STATUS_PCC); 331 no_way_out |= !!(m.status & MCI_STATUS_PCC);
@@ -235,6 +339,12 @@ void do_machine_check(struct pt_regs * regs, long error_code)
235 no_way_out = 1; 339 no_way_out = 1;
236 kill_it = 1; 340 kill_it = 1;
237 } 341 }
342 } else {
343 /*
344 * Machine check event was not enabled. Clear, but
345 * ignore.
346 */
347 continue;
238 } 348 }
239 349
240 if (m.status & MCI_STATUS_MISCV) 350 if (m.status & MCI_STATUS_MISCV)
@@ -243,10 +353,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
243 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); 353 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
244 354
245 mce_get_rip(&m, regs); 355 mce_get_rip(&m, regs);
246 if (error_code >= 0) 356 mce_log(&m);
247 rdtscll(m.tsc);
248 if (error_code != -2)
249 mce_log(&m);
250 357
251 /* Did this bank cause the exception? */ 358 /* Did this bank cause the exception? */
252 /* Assume that the bank with uncorrectable errors did it, 359 /* Assume that the bank with uncorrectable errors did it,
@@ -255,14 +362,8 @@ void do_machine_check(struct pt_regs * regs, long error_code)
255 panicm = m; 362 panicm = m;
256 panicm_found = 1; 363 panicm_found = 1;
257 } 364 }
258
259 add_taint(TAINT_MACHINE_CHECK);
260 } 365 }
261 366
262 /* Never do anything final in the polling timer */
263 if (!regs)
264 goto out;
265
266 /* If we didn't find an uncorrectable error, pick 367 /* If we didn't find an uncorrectable error, pick
267 the last one (shouldn't happen, just being safe). */ 368 the last one (shouldn't happen, just being safe). */
268 if (!panicm_found) 369 if (!panicm_found)
@@ -309,10 +410,11 @@ void do_machine_check(struct pt_regs * regs, long error_code)
309 /* notify userspace ASAP */ 410 /* notify userspace ASAP */
310 set_thread_flag(TIF_MCE_NOTIFY); 411 set_thread_flag(TIF_MCE_NOTIFY);
311 412
312 out:
313 /* the last thing we do is clear state */ 413 /* the last thing we do is clear state */
314 for (i = 0; i < banks; i++) 414 for (i = 0; i < banks; i++) {
315 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 415 if (test_bit(i, toclear))
416 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
417 }
316 wrmsrl(MSR_IA32_MCG_STATUS, 0); 418 wrmsrl(MSR_IA32_MCG_STATUS, 0);
317 out2: 419 out2:
318 atomic_dec(&mce_entry); 420 atomic_dec(&mce_entry);
@@ -332,15 +434,13 @@ void do_machine_check(struct pt_regs * regs, long error_code)
332 * and historically has been the register value of the 434 * and historically has been the register value of the
333 * MSR_IA32_THERMAL_STATUS (Intel) msr. 435 * MSR_IA32_THERMAL_STATUS (Intel) msr.
334 */ 436 */
335void mce_log_therm_throt_event(unsigned int cpu, __u64 status) 437void mce_log_therm_throt_event(__u64 status)
336{ 438{
337 struct mce m; 439 struct mce m;
338 440
339 memset(&m, 0, sizeof(m)); 441 mce_setup(&m);
340 m.cpu = cpu;
341 m.bank = MCE_THERMAL_BANK; 442 m.bank = MCE_THERMAL_BANK;
342 m.status = status; 443 m.status = status;
343 rdtscll(m.tsc);
344 mce_log(&m); 444 mce_log(&m);
345} 445}
346#endif /* CONFIG_X86_MCE_INTEL */ 446#endif /* CONFIG_X86_MCE_INTEL */
@@ -353,18 +453,18 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
353 453
354static int check_interval = 5 * 60; /* 5 minutes */ 454static int check_interval = 5 * 60; /* 5 minutes */
355static int next_interval; /* in jiffies */ 455static int next_interval; /* in jiffies */
356static void mcheck_timer(struct work_struct *work); 456static void mcheck_timer(unsigned long);
357static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); 457static DEFINE_PER_CPU(struct timer_list, mce_timer);
358 458
359static void mcheck_check_cpu(void *info) 459static void mcheck_timer(unsigned long data)
360{ 460{
361 if (mce_available(&current_cpu_data)) 461 struct timer_list *t = &per_cpu(mce_timer, data);
362 do_machine_check(NULL, 0);
363}
364 462
365static void mcheck_timer(struct work_struct *work) 463 WARN_ON(smp_processor_id() != data);
366{ 464
367 on_each_cpu(mcheck_check_cpu, NULL, 1); 465 if (mce_available(&current_cpu_data))
466 machine_check_poll(MCP_TIMESTAMP,
467 &__get_cpu_var(mce_poll_banks));
368 468
369 /* 469 /*
370 * Alert userspace if needed. If we logged an MCE, reduce the 470 * Alert userspace if needed. If we logged an MCE, reduce the
@@ -377,31 +477,41 @@ static void mcheck_timer(struct work_struct *work)
377 (int)round_jiffies_relative(check_interval*HZ)); 477 (int)round_jiffies_relative(check_interval*HZ));
378 } 478 }
379 479
380 schedule_delayed_work(&mcheck_work, next_interval); 480 t->expires = jiffies + next_interval;
481 add_timer(t);
482}
483
484static void mce_do_trigger(struct work_struct *work)
485{
486 call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
381} 487}
382 488
489static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
490
383/* 491/*
384 * This is only called from process context. This is where we do 492 * Notify the user(s) about new machine check events.
385 * anything we need to alert userspace about new MCEs. This is called 493 * Can be called from interrupt context, but not from machine check/NMI
386 * directly from the poller and also from entry.S and idle, thanks to 494 * context.
387 * TIF_MCE_NOTIFY.
388 */ 495 */
389int mce_notify_user(void) 496int mce_notify_user(void)
390{ 497{
498 /* Not more than two messages every minute */
499 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
500
391 clear_thread_flag(TIF_MCE_NOTIFY); 501 clear_thread_flag(TIF_MCE_NOTIFY);
392 if (test_and_clear_bit(0, &notify_user)) { 502 if (test_and_clear_bit(0, &notify_user)) {
393 static unsigned long last_print;
394 unsigned long now = jiffies;
395
396 wake_up_interruptible(&mce_wait); 503 wake_up_interruptible(&mce_wait);
397 if (trigger[0])
398 call_usermodehelper(trigger, trigger_argv, NULL,
399 UMH_NO_WAIT);
400 504
401 if (time_after_eq(now, last_print + (check_interval*HZ))) { 505 /*
402 last_print = now; 506 * There is no risk of missing notifications because
507 * work_pending is always cleared before the function is
508 * executed.
509 */
510 if (trigger[0] && !work_pending(&mce_trigger_work))
511 schedule_work(&mce_trigger_work);
512
513 if (__ratelimit(&ratelimit))
403 printk(KERN_INFO "Machine check events logged\n"); 514 printk(KERN_INFO "Machine check events logged\n");
404 }
405 515
406 return 1; 516 return 1;
407 } 517 }
@@ -425,63 +535,78 @@ static struct notifier_block mce_idle_notifier = {
425 535
426static __init int periodic_mcheck_init(void) 536static __init int periodic_mcheck_init(void)
427{ 537{
428 next_interval = check_interval * HZ; 538 idle_notifier_register(&mce_idle_notifier);
429 if (next_interval) 539 return 0;
430 schedule_delayed_work(&mcheck_work,
431 round_jiffies_relative(next_interval));
432 idle_notifier_register(&mce_idle_notifier);
433 return 0;
434} 540}
435__initcall(periodic_mcheck_init); 541__initcall(periodic_mcheck_init);
436 542
437
438/* 543/*
439 * Initialize Machine Checks for a CPU. 544 * Initialize Machine Checks for a CPU.
440 */ 545 */
441static void mce_init(void *dummy) 546static int mce_cap_init(void)
442{ 547{
443 u64 cap; 548 u64 cap;
444 int i; 549 unsigned b;
445 550
446 rdmsrl(MSR_IA32_MCG_CAP, cap); 551 rdmsrl(MSR_IA32_MCG_CAP, cap);
447 banks = cap & 0xff; 552 b = cap & 0xff;
448 if (banks > MCE_EXTENDED_BANK) { 553 if (b > MAX_NR_BANKS) {
449 banks = MCE_EXTENDED_BANK; 554 printk(KERN_WARNING
450 printk(KERN_INFO "MCE: warning: using only %d banks\n", 555 "MCE: Using only %u machine check banks out of %u\n",
451 MCE_EXTENDED_BANK); 556 MAX_NR_BANKS, b);
557 b = MAX_NR_BANKS;
452 } 558 }
559
560 /* Don't support asymmetric configurations today */
561 WARN_ON(banks != 0 && b != banks);
562 banks = b;
563 if (!bank) {
564 bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
565 if (!bank)
566 return -ENOMEM;
567 memset(bank, 0xff, banks * sizeof(u64));
568 }
569
453 /* Use accurate RIP reporting if available. */ 570 /* Use accurate RIP reporting if available. */
454 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) 571 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
455 rip_msr = MSR_IA32_MCG_EIP; 572 rip_msr = MSR_IA32_MCG_EIP;
456 573
457 /* Log the machine checks left over from the previous reset. 574 return 0;
458 This also clears all registers */ 575}
459 do_machine_check(NULL, mce_bootlog ? -1 : -2); 576
577static void mce_init(void *dummy)
578{
579 u64 cap;
580 int i;
581 mce_banks_t all_banks;
582
583 /*
584 * Log the machine checks left over from the previous reset.
585 */
586 bitmap_fill(all_banks, MAX_NR_BANKS);
587 machine_check_poll(MCP_UC, &all_banks);
460 588
461 set_in_cr4(X86_CR4_MCE); 589 set_in_cr4(X86_CR4_MCE);
462 590
591 rdmsrl(MSR_IA32_MCG_CAP, cap);
463 if (cap & MCG_CTL_P) 592 if (cap & MCG_CTL_P)
464 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 593 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
465 594
466 for (i = 0; i < banks; i++) { 595 for (i = 0; i < banks; i++) {
467 if (i < NR_SYSFS_BANKS) 596 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
468 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
469 else
470 wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
471
472 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 597 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
473 } 598 }
474} 599}
475 600
476/* Add per CPU specific workarounds here */ 601/* Add per CPU specific workarounds here */
477static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) 602static void mce_cpu_quirks(struct cpuinfo_x86 *c)
478{ 603{
479 /* This should be disabled by the BIOS, but isn't always */ 604 /* This should be disabled by the BIOS, but isn't always */
480 if (c->x86_vendor == X86_VENDOR_AMD) { 605 if (c->x86_vendor == X86_VENDOR_AMD) {
481 if(c->x86 == 15) 606 if (c->x86 == 15 && banks > 4)
482 /* disable GART TBL walk error reporting, which trips off 607 /* disable GART TBL walk error reporting, which trips off
483 incorrectly with the IOMMU & 3ware & Cerberus. */ 608 incorrectly with the IOMMU & 3ware & Cerberus. */
484 clear_bit(10, &bank[4]); 609 clear_bit(10, (unsigned long *)&bank[4]);
485 if(c->x86 <= 17 && mce_bootlog < 0) 610 if(c->x86 <= 17 && mce_bootlog < 0)
486 /* Lots of broken BIOS around that don't clear them 611 /* Lots of broken BIOS around that don't clear them
487 by default and leave crap in there. Don't log. */ 612 by default and leave crap in there. Don't log. */
@@ -504,20 +629,38 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
504 } 629 }
505} 630}
506 631
632static void mce_init_timer(void)
633{
634 struct timer_list *t = &__get_cpu_var(mce_timer);
635
636 /* data race harmless because everyone sets to the same value */
637 if (!next_interval)
638 next_interval = check_interval * HZ;
639 if (!next_interval)
640 return;
641 setup_timer(t, mcheck_timer, smp_processor_id());
642 t->expires = round_jiffies(jiffies + next_interval);
643 add_timer(t);
644}
645
507/* 646/*
508 * Called for each booted CPU to set up machine checks. 647 * Called for each booted CPU to set up machine checks.
509 * Must be called with preempt off. 648 * Must be called with preempt off.
510 */ 649 */
511void __cpuinit mcheck_init(struct cpuinfo_x86 *c) 650void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
512{ 651{
513 mce_cpu_quirks(c); 652 if (!mce_available(c))
653 return;
514 654
515 if (mce_dont_init || 655 if (mce_cap_init() < 0) {
516 !mce_available(c)) 656 mce_dont_init = 1;
517 return; 657 return;
658 }
659 mce_cpu_quirks(c);
518 660
519 mce_init(NULL); 661 mce_init(NULL);
520 mce_cpu_features(c); 662 mce_cpu_features(c);
663 mce_init_timer();
521} 664}
522 665
523/* 666/*
@@ -573,7 +716,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
573{ 716{
574 unsigned long *cpu_tsc; 717 unsigned long *cpu_tsc;
575 static DEFINE_MUTEX(mce_read_mutex); 718 static DEFINE_MUTEX(mce_read_mutex);
576 unsigned next; 719 unsigned prev, next;
577 char __user *buf = ubuf; 720 char __user *buf = ubuf;
578 int i, err; 721 int i, err;
579 722
@@ -592,25 +735,32 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
592 } 735 }
593 736
594 err = 0; 737 err = 0;
595 for (i = 0; i < next; i++) { 738 prev = 0;
596 unsigned long start = jiffies; 739 do {
597 740 for (i = prev; i < next; i++) {
598 while (!mcelog.entry[i].finished) { 741 unsigned long start = jiffies;
599 if (time_after_eq(jiffies, start + 2)) { 742
600 memset(mcelog.entry + i,0, sizeof(struct mce)); 743 while (!mcelog.entry[i].finished) {
601 goto timeout; 744 if (time_after_eq(jiffies, start + 2)) {
745 memset(mcelog.entry + i, 0,
746 sizeof(struct mce));
747 goto timeout;
748 }
749 cpu_relax();
602 } 750 }
603 cpu_relax(); 751 smp_rmb();
752 err |= copy_to_user(buf, mcelog.entry + i,
753 sizeof(struct mce));
754 buf += sizeof(struct mce);
755timeout:
756 ;
604 } 757 }
605 smp_rmb();
606 err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
607 buf += sizeof(struct mce);
608 timeout:
609 ;
610 }
611 758
612 memset(mcelog.entry, 0, next * sizeof(struct mce)); 759 memset(mcelog.entry + prev, 0,
613 mcelog.next = 0; 760 (next - prev) * sizeof(struct mce));
761 prev = next;
762 next = cmpxchg(&mcelog.next, prev, 0);
763 } while (next != prev);
614 764
615 synchronize_sched(); 765 synchronize_sched();
616 766
@@ -680,20 +830,6 @@ static struct miscdevice mce_log_device = {
680 &mce_chrdev_ops, 830 &mce_chrdev_ops,
681}; 831};
682 832
683static unsigned long old_cr4 __initdata;
684
685void __init stop_mce(void)
686{
687 old_cr4 = read_cr4();
688 clear_in_cr4(X86_CR4_MCE);
689}
690
691void __init restart_mce(void)
692{
693 if (old_cr4 & X86_CR4_MCE)
694 set_in_cr4(X86_CR4_MCE);
695}
696
697/* 833/*
698 * Old style boot options parsing. Only for compatibility. 834 * Old style boot options parsing. Only for compatibility.
699 */ 835 */
@@ -703,8 +839,7 @@ static int __init mcheck_disable(char *str)
703 return 1; 839 return 1;
704} 840}
705 841
706/* mce=off disables machine check. Note you can re-enable it later 842/* mce=off disables machine check.
707 using sysfs.
708 mce=TOLERANCELEVEL (number, see above) 843 mce=TOLERANCELEVEL (number, see above)
709 mce=bootlog Log MCEs from before booting. Disabled by default on AMD. 844 mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
710 mce=nobootlog Don't log MCEs from before booting. */ 845 mce=nobootlog Don't log MCEs from before booting. */
@@ -728,6 +863,29 @@ __setup("mce=", mcheck_enable);
728 * Sysfs support 863 * Sysfs support
729 */ 864 */
730 865
866/*
867 * Disable machine checks on suspend and shutdown. We can't really handle
868 * them later.
869 */
870static int mce_disable(void)
871{
872 int i;
873
874 for (i = 0; i < banks; i++)
875 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
876 return 0;
877}
878
879static int mce_suspend(struct sys_device *dev, pm_message_t state)
880{
881 return mce_disable();
882}
883
884static int mce_shutdown(struct sys_device *dev)
885{
886 return mce_disable();
887}
888
731/* On resume clear all MCE state. Don't want to see leftovers from the BIOS. 889/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
732 Only one CPU is active at this time, the others get readded later using 890 Only one CPU is active at this time, the others get readded later using
733 CPU hotplug. */ 891 CPU hotplug. */
@@ -738,20 +896,24 @@ static int mce_resume(struct sys_device *dev)
738 return 0; 896 return 0;
739} 897}
740 898
899static void mce_cpu_restart(void *data)
900{
901 del_timer_sync(&__get_cpu_var(mce_timer));
902 if (mce_available(&current_cpu_data))
903 mce_init(NULL);
904 mce_init_timer();
905}
906
741/* Reinit MCEs after user configuration changes */ 907/* Reinit MCEs after user configuration changes */
742static void mce_restart(void) 908static void mce_restart(void)
743{ 909{
744 if (next_interval)
745 cancel_delayed_work(&mcheck_work);
746 /* Timer race is harmless here */
747 on_each_cpu(mce_init, NULL, 1);
748 next_interval = check_interval * HZ; 910 next_interval = check_interval * HZ;
749 if (next_interval) 911 on_each_cpu(mce_cpu_restart, NULL, 1);
750 schedule_delayed_work(&mcheck_work,
751 round_jiffies_relative(next_interval));
752} 912}
753 913
754static struct sysdev_class mce_sysclass = { 914static struct sysdev_class mce_sysclass = {
915 .suspend = mce_suspend,
916 .shutdown = mce_shutdown,
755 .resume = mce_resume, 917 .resume = mce_resume,
756 .name = "machinecheck", 918 .name = "machinecheck",
757}; 919};
@@ -778,16 +940,26 @@ void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinit
778 } \ 940 } \
779 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); 941 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
780 942
781/* 943static struct sysdev_attribute *bank_attrs;
782 * TBD should generate these dynamically based on number of available banks. 944
783 * Have only 6 contol banks in /sysfs until then. 945static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
784 */ 946 char *buf)
785ACCESSOR(bank0ctl,bank[0],mce_restart()) 947{
786ACCESSOR(bank1ctl,bank[1],mce_restart()) 948 u64 b = bank[attr - bank_attrs];
787ACCESSOR(bank2ctl,bank[2],mce_restart()) 949 return sprintf(buf, "%llx\n", b);
788ACCESSOR(bank3ctl,bank[3],mce_restart()) 950}
789ACCESSOR(bank4ctl,bank[4],mce_restart()) 951
790ACCESSOR(bank5ctl,bank[5],mce_restart()) 952static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
953 const char *buf, size_t siz)
954{
955 char *end;
956 u64 new = simple_strtoull(buf, &end, 0);
957 if (end == buf)
958 return -EINVAL;
959 bank[attr - bank_attrs] = new;
960 mce_restart();
961 return end-buf;
962}
791 963
792static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, 964static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
793 char *buf) 965 char *buf)
@@ -814,8 +986,6 @@ static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
814static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); 986static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
815ACCESSOR(check_interval,check_interval,mce_restart()) 987ACCESSOR(check_interval,check_interval,mce_restart())
816static struct sysdev_attribute *mce_attributes[] = { 988static struct sysdev_attribute *mce_attributes[] = {
817 &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
818 &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
819 &attr_tolerant.attr, &attr_check_interval, &attr_trigger, 989 &attr_tolerant.attr, &attr_check_interval, &attr_trigger,
820 NULL 990 NULL
821}; 991};
@@ -845,11 +1015,22 @@ static __cpuinit int mce_create_device(unsigned int cpu)
845 if (err) 1015 if (err)
846 goto error; 1016 goto error;
847 } 1017 }
1018 for (i = 0; i < banks; i++) {
1019 err = sysdev_create_file(&per_cpu(device_mce, cpu),
1020 &bank_attrs[i]);
1021 if (err)
1022 goto error2;
1023 }
848 cpu_set(cpu, mce_device_initialized); 1024 cpu_set(cpu, mce_device_initialized);
849 1025
850 return 0; 1026 return 0;
1027error2:
1028 while (--i >= 0) {
1029 sysdev_remove_file(&per_cpu(device_mce, cpu),
1030 &bank_attrs[i]);
1031 }
851error: 1032error:
852 while (i--) { 1033 while (--i >= 0) {
853 sysdev_remove_file(&per_cpu(device_mce,cpu), 1034 sysdev_remove_file(&per_cpu(device_mce,cpu),
854 mce_attributes[i]); 1035 mce_attributes[i]);
855 } 1036 }
@@ -868,15 +1049,46 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
868 for (i = 0; mce_attributes[i]; i++) 1049 for (i = 0; mce_attributes[i]; i++)
869 sysdev_remove_file(&per_cpu(device_mce,cpu), 1050 sysdev_remove_file(&per_cpu(device_mce,cpu),
870 mce_attributes[i]); 1051 mce_attributes[i]);
1052 for (i = 0; i < banks; i++)
1053 sysdev_remove_file(&per_cpu(device_mce, cpu),
1054 &bank_attrs[i]);
871 sysdev_unregister(&per_cpu(device_mce,cpu)); 1055 sysdev_unregister(&per_cpu(device_mce,cpu));
872 cpu_clear(cpu, mce_device_initialized); 1056 cpu_clear(cpu, mce_device_initialized);
873} 1057}
874 1058
1059/* Make sure there are no machine checks on offlined CPUs. */
1060static void mce_disable_cpu(void *h)
1061{
1062 int i;
1063 unsigned long action = *(unsigned long *)h;
1064
1065 if (!mce_available(&current_cpu_data))
1066 return;
1067 if (!(action & CPU_TASKS_FROZEN))
1068 cmci_clear();
1069 for (i = 0; i < banks; i++)
1070 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
1071}
1072
1073static void mce_reenable_cpu(void *h)
1074{
1075 int i;
1076 unsigned long action = *(unsigned long *)h;
1077
1078 if (!mce_available(&current_cpu_data))
1079 return;
1080 if (!(action & CPU_TASKS_FROZEN))
1081 cmci_reenable();
1082 for (i = 0; i < banks; i++)
1083 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
1084}
1085
875/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 1086/* Get notified when a cpu comes on/off. Be hotplug friendly. */
876static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, 1087static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
877 unsigned long action, void *hcpu) 1088 unsigned long action, void *hcpu)
878{ 1089{
879 unsigned int cpu = (unsigned long)hcpu; 1090 unsigned int cpu = (unsigned long)hcpu;
1091 struct timer_list *t = &per_cpu(mce_timer, cpu);
880 1092
881 switch (action) { 1093 switch (action) {
882 case CPU_ONLINE: 1094 case CPU_ONLINE:
@@ -891,6 +1103,21 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
891 threshold_cpu_callback(action, cpu); 1103 threshold_cpu_callback(action, cpu);
892 mce_remove_device(cpu); 1104 mce_remove_device(cpu);
893 break; 1105 break;
1106 case CPU_DOWN_PREPARE:
1107 case CPU_DOWN_PREPARE_FROZEN:
1108 del_timer_sync(t);
1109 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
1110 break;
1111 case CPU_DOWN_FAILED:
1112 case CPU_DOWN_FAILED_FROZEN:
1113 t->expires = round_jiffies(jiffies + next_interval);
1114 add_timer_on(t, cpu);
1115 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
1116 break;
1117 case CPU_POST_DEAD:
1118 /* intentionally ignoring frozen here */
1119 cmci_rediscover(cpu);
1120 break;
894 } 1121 }
895 return NOTIFY_OK; 1122 return NOTIFY_OK;
896} 1123}
@@ -899,6 +1126,34 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = {
899 .notifier_call = mce_cpu_callback, 1126 .notifier_call = mce_cpu_callback,
900}; 1127};
901 1128
1129static __init int mce_init_banks(void)
1130{
1131 int i;
1132
1133 bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
1134 GFP_KERNEL);
1135 if (!bank_attrs)
1136 return -ENOMEM;
1137
1138 for (i = 0; i < banks; i++) {
1139 struct sysdev_attribute *a = &bank_attrs[i];
1140 a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
1141 if (!a->attr.name)
1142 goto nomem;
1143 a->attr.mode = 0644;
1144 a->show = show_bank;
1145 a->store = set_bank;
1146 }
1147 return 0;
1148
1149nomem:
1150 while (--i >= 0)
1151 kfree(bank_attrs[i].attr.name);
1152 kfree(bank_attrs);
1153 bank_attrs = NULL;
1154 return -ENOMEM;
1155}
1156
902static __init int mce_init_device(void) 1157static __init int mce_init_device(void)
903{ 1158{
904 int err; 1159 int err;
@@ -906,6 +1161,11 @@ static __init int mce_init_device(void)
906 1161
907 if (!mce_available(&boot_cpu_data)) 1162 if (!mce_available(&boot_cpu_data))
908 return -EIO; 1163 return -EIO;
1164
1165 err = mce_init_banks();
1166 if (err)
1167 return err;
1168
909 err = sysdev_class_register(&mce_sysclass); 1169 err = sysdev_class_register(&mce_sysclass);
910 if (err) 1170 if (err)
911 return err; 1171 return err;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index f2ee0ae29bd..c5a32f92d07 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = {
67struct threshold_bank { 67struct threshold_bank {
68 struct kobject *kobj; 68 struct kobject *kobj;
69 struct threshold_block *blocks; 69 struct threshold_block *blocks;
70 cpumask_t cpus; 70 cpumask_var_t cpus;
71}; 71};
72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); 72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
73 73
@@ -79,6 +79,8 @@ static unsigned char shared_bank[NR_BANKS] = {
79 79
80static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ 80static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */
81 81
82static void amd_threshold_interrupt(void);
83
82/* 84/*
83 * CPU Initialization 85 * CPU Initialization
84 */ 86 */
@@ -174,6 +176,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
174 tr.reset = 0; 176 tr.reset = 0;
175 tr.old_limit = 0; 177 tr.old_limit = 0;
176 threshold_restart_bank(&tr); 178 threshold_restart_bank(&tr);
179
180 mce_threshold_vector = amd_threshold_interrupt;
177 } 181 }
178 } 182 }
179} 183}
@@ -187,19 +191,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
187 * the interrupt goes off when error_count reaches threshold_limit. 191 * the interrupt goes off when error_count reaches threshold_limit.
188 * the handler will simply log mcelog w/ software defined bank number. 192 * the handler will simply log mcelog w/ software defined bank number.
189 */ 193 */
190asmlinkage void mce_threshold_interrupt(void) 194static void amd_threshold_interrupt(void)
191{ 195{
192 unsigned int bank, block; 196 unsigned int bank, block;
193 struct mce m; 197 struct mce m;
194 u32 low = 0, high = 0, address = 0; 198 u32 low = 0, high = 0, address = 0;
195 199
196 ack_APIC_irq(); 200 mce_setup(&m);
197 exit_idle();
198 irq_enter();
199
200 memset(&m, 0, sizeof(m));
201 rdtscll(m.tsc);
202 m.cpu = smp_processor_id();
203 201
204 /* assume first bank caused it */ 202 /* assume first bank caused it */
205 for (bank = 0; bank < NR_BANKS; ++bank) { 203 for (bank = 0; bank < NR_BANKS; ++bank) {
@@ -233,7 +231,8 @@ asmlinkage void mce_threshold_interrupt(void)
233 231
234 /* Log the machine check that caused the threshold 232 /* Log the machine check that caused the threshold
235 event. */ 233 event. */
236 do_machine_check(NULL, 0); 234 machine_check_poll(MCP_TIMESTAMP,
235 &__get_cpu_var(mce_poll_banks));
237 236
238 if (high & MASK_OVERFLOW_HI) { 237 if (high & MASK_OVERFLOW_HI) {
239 rdmsrl(address, m.misc); 238 rdmsrl(address, m.misc);
@@ -243,13 +242,10 @@ asmlinkage void mce_threshold_interrupt(void)
243 + bank * NR_BLOCKS 242 + bank * NR_BLOCKS
244 + block; 243 + block;
245 mce_log(&m); 244 mce_log(&m);
246 goto out; 245 return;
247 } 246 }
248 } 247 }
249 } 248 }
250out:
251 inc_irq_stat(irq_threshold_count);
252 irq_exit();
253} 249}
254 250
255/* 251/*
@@ -481,7 +477,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
481 477
482#ifdef CONFIG_SMP 478#ifdef CONFIG_SMP
483 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 479 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
484 i = first_cpu(per_cpu(cpu_core_map, cpu)); 480 i = cpumask_first(&per_cpu(cpu_core_map, cpu));
485 481
486 /* first core not up yet */ 482 /* first core not up yet */
487 if (cpu_data(i).cpu_core_id) 483 if (cpu_data(i).cpu_core_id)
@@ -501,7 +497,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
501 if (err) 497 if (err)
502 goto out; 498 goto out;
503 499
504 b->cpus = per_cpu(cpu_core_map, cpu); 500 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
505 per_cpu(threshold_banks, cpu)[bank] = b; 501 per_cpu(threshold_banks, cpu)[bank] = b;
506 goto out; 502 goto out;
507 } 503 }
@@ -512,15 +508,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
512 err = -ENOMEM; 508 err = -ENOMEM;
513 goto out; 509 goto out;
514 } 510 }
511 if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
512 kfree(b);
513 err = -ENOMEM;
514 goto out;
515 }
515 516
516 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); 517 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
517 if (!b->kobj) 518 if (!b->kobj)
518 goto out_free; 519 goto out_free;
519 520
520#ifndef CONFIG_SMP 521#ifndef CONFIG_SMP
521 b->cpus = CPU_MASK_ALL; 522 cpumask_setall(b->cpus);
522#else 523#else
523 b->cpus = per_cpu(cpu_core_map, cpu); 524 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
524#endif 525#endif
525 526
526 per_cpu(threshold_banks, cpu)[bank] = b; 527 per_cpu(threshold_banks, cpu)[bank] = b;
@@ -529,7 +530,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
529 if (err) 530 if (err)
530 goto out_free; 531 goto out_free;
531 532
532 for_each_cpu_mask_nr(i, b->cpus) { 533 for_each_cpu(i, b->cpus) {
533 if (i == cpu) 534 if (i == cpu)
534 continue; 535 continue;
535 536
@@ -545,6 +546,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
545 546
546out_free: 547out_free:
547 per_cpu(threshold_banks, cpu)[bank] = NULL; 548 per_cpu(threshold_banks, cpu)[bank] = NULL;
549 free_cpumask_var(b->cpus);
548 kfree(b); 550 kfree(b);
549out: 551out:
550 return err; 552 return err;
@@ -619,7 +621,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
619#endif 621#endif
620 622
621 /* remove all sibling symlinks before unregistering */ 623 /* remove all sibling symlinks before unregistering */
622 for_each_cpu_mask_nr(i, b->cpus) { 624 for_each_cpu(i, b->cpus) {
623 if (i == cpu) 625 if (i == cpu)
624 continue; 626 continue;
625 627
@@ -632,6 +634,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
632free_out: 634free_out:
633 kobject_del(b->kobj); 635 kobject_del(b->kobj);
634 kobject_put(b->kobj); 636 kobject_put(b->kobj);
637 free_cpumask_var(b->cpus);
635 kfree(b); 638 kfree(b);
636 per_cpu(threshold_banks, cpu)[bank] = NULL; 639 per_cpu(threshold_banks, cpu)[bank] = NULL;
637} 640}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index f44c3662436..aaa7d973093 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -1,17 +1,21 @@
1/* 1/*
2 * Intel specific MCE features. 2 * Intel specific MCE features.
3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> 3 * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
4 * Copyright (C) 2008, 2009 Intel Corporation
5 * Author: Andi Kleen
4 */ 6 */
5 7
6#include <linux/init.h> 8#include <linux/init.h>
7#include <linux/interrupt.h> 9#include <linux/interrupt.h>
8#include <linux/percpu.h> 10#include <linux/percpu.h>
9#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/apic.h>
10#include <asm/msr.h> 13#include <asm/msr.h>
11#include <asm/mce.h> 14#include <asm/mce.h>
12#include <asm/hw_irq.h> 15#include <asm/hw_irq.h>
13#include <asm/idle.h> 16#include <asm/idle.h>
14#include <asm/therm_throt.h> 17#include <asm/therm_throt.h>
18#include <asm/apic.h>
15 19
16asmlinkage void smp_thermal_interrupt(void) 20asmlinkage void smp_thermal_interrupt(void)
17{ 21{
@@ -24,7 +28,7 @@ asmlinkage void smp_thermal_interrupt(void)
24 28
25 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 29 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
26 if (therm_throt_process(msr_val & 1)) 30 if (therm_throt_process(msr_val & 1))
27 mce_log_therm_throt_event(smp_processor_id(), msr_val); 31 mce_log_therm_throt_event(msr_val);
28 32
29 inc_irq_stat(irq_thermal_count); 33 inc_irq_stat(irq_thermal_count);
30 irq_exit(); 34 irq_exit();
@@ -48,13 +52,13 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
48 */ 52 */
49 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 53 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
50 h = apic_read(APIC_LVTTHMR); 54 h = apic_read(APIC_LVTTHMR);
51 if ((l & (1 << 3)) && (h & APIC_DM_SMI)) { 55 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
52 printk(KERN_DEBUG 56 printk(KERN_DEBUG
53 "CPU%d: Thermal monitoring handled by SMI\n", cpu); 57 "CPU%d: Thermal monitoring handled by SMI\n", cpu);
54 return; 58 return;
55 } 59 }
56 60
57 if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) 61 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
58 tm2 = 1; 62 tm2 = 1;
59 63
60 if (h & APIC_VECTOR_MASK) { 64 if (h & APIC_VECTOR_MASK) {
@@ -72,7 +76,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
72 wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); 76 wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
73 77
74 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 78 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
75 wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); 79 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
76 80
77 l = apic_read(APIC_LVTTHMR); 81 l = apic_read(APIC_LVTTHMR);
78 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 82 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
@@ -84,7 +88,209 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
84 return; 88 return;
85} 89}
86 90
91/*
92 * Support for Intel Correct Machine Check Interrupts. This allows
93 * the CPU to raise an interrupt when a corrected machine check happened.
94 * Normally we pick those up using a regular polling timer.
95 * Also supports reliable discovery of shared banks.
96 */
97
98static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
99
100/*
101 * cmci_discover_lock protects against parallel discovery attempts
102 * which could race against each other.
103 */
104static DEFINE_SPINLOCK(cmci_discover_lock);
105
106#define CMCI_THRESHOLD 1
107
108static int cmci_supported(int *banks)
109{
110 u64 cap;
111
112 /*
113 * Vendor check is not strictly needed, but the initial
114 * initialization is vendor keyed and this
115 * makes sure none of the backdoors are entered otherwise.
116 */
117 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
118 return 0;
119 if (!cpu_has_apic || lapic_get_maxlvt() < 6)
120 return 0;
121 rdmsrl(MSR_IA32_MCG_CAP, cap);
122 *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
123 return !!(cap & MCG_CMCI_P);
124}
125
126/*
127 * The interrupt handler. This is called on every event.
128 * Just call the poller directly to log any events.
129 * This could in theory increase the threshold under high load,
130 * but doesn't for now.
131 */
132static void intel_threshold_interrupt(void)
133{
134 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
135 mce_notify_user();
136}
137
138static void print_update(char *type, int *hdr, int num)
139{
140 if (*hdr == 0)
141 printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
142 *hdr = 1;
143 printk(KERN_CONT " %s:%d", type, num);
144}
145
146/*
147 * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
148 * on this CPU. Use the algorithm recommended in the SDM to discover shared
149 * banks.
150 */
151static void cmci_discover(int banks, int boot)
152{
153 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
154 int hdr = 0;
155 int i;
156
157 spin_lock(&cmci_discover_lock);
158 for (i = 0; i < banks; i++) {
159 u64 val;
160
161 if (test_bit(i, owned))
162 continue;
163
164 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
165
166 /* Already owned by someone else? */
167 if (val & CMCI_EN) {
168 if (test_and_clear_bit(i, owned) || boot)
169 print_update("SHD", &hdr, i);
170 __clear_bit(i, __get_cpu_var(mce_poll_banks));
171 continue;
172 }
173
174 val |= CMCI_EN | CMCI_THRESHOLD;
175 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
176 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
177
178 /* Did the enable bit stick? -- the bank supports CMCI */
179 if (val & CMCI_EN) {
180 if (!test_and_set_bit(i, owned) || boot)
181 print_update("CMCI", &hdr, i);
182 __clear_bit(i, __get_cpu_var(mce_poll_banks));
183 } else {
184 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
185 }
186 }
187 spin_unlock(&cmci_discover_lock);
188 if (hdr)
189 printk(KERN_CONT "\n");
190}
191
192/*
193 * Just in case we missed an event during initialization check
194 * all the CMCI owned banks.
195 */
196void cmci_recheck(void)
197{
198 unsigned long flags;
199 int banks;
200
201 if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
202 return;
203 local_irq_save(flags);
204 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
205 local_irq_restore(flags);
206}
207
208/*
209 * Disable CMCI on this CPU for all banks it owns when it goes down.
210 * This allows other CPUs to claim the banks on rediscovery.
211 */
212void cmci_clear(void)
213{
214 int i;
215 int banks;
216 u64 val;
217
218 if (!cmci_supported(&banks))
219 return;
220 spin_lock(&cmci_discover_lock);
221 for (i = 0; i < banks; i++) {
222 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
223 continue;
224 /* Disable CMCI */
225 rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
226 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
227 wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
228 __clear_bit(i, __get_cpu_var(mce_banks_owned));
229 }
230 spin_unlock(&cmci_discover_lock);
231}
232
233/*
234 * After a CPU went down cycle through all the others and rediscover
235 * Must run in process context.
236 */
237void cmci_rediscover(int dying)
238{
239 int banks;
240 int cpu;
241 cpumask_var_t old;
242
243 if (!cmci_supported(&banks))
244 return;
245 if (!alloc_cpumask_var(&old, GFP_KERNEL))
246 return;
247 cpumask_copy(old, &current->cpus_allowed);
248
249 for_each_online_cpu (cpu) {
250 if (cpu == dying)
251 continue;
252 if (set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)))
253 continue;
254 /* Recheck banks in case CPUs don't all have the same */
255 if (cmci_supported(&banks))
256 cmci_discover(banks, 0);
257 }
258
259 set_cpus_allowed_ptr(current, old);
260 free_cpumask_var(old);
261}
262
263/*
264 * Reenable CMCI on this CPU in case a CPU down failed.
265 */
266void cmci_reenable(void)
267{
268 int banks;
269 if (cmci_supported(&banks))
270 cmci_discover(banks, 0);
271}
272
273static __cpuinit void intel_init_cmci(void)
274{
275 int banks;
276
277 if (!cmci_supported(&banks))
278 return;
279
280 mce_threshold_vector = intel_threshold_interrupt;
281 cmci_discover(banks, 1);
282 /*
283 * For CPU #0 this runs with still disabled APIC, but that's
284 * ok because only the vector is set up. We still do another
285 * check for the banks later for CPU #0 just to make sure
286 * to not miss any events.
287 */
288 apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
289 cmci_recheck();
290}
291
87void mce_intel_feature_init(struct cpuinfo_x86 *c) 292void mce_intel_feature_init(struct cpuinfo_x86 *c)
88{ 293{
89 intel_init_thermal(c); 294 intel_init_thermal(c);
295 intel_init_cmci();
90} 296}
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index 9b60fce09f7..f53bdcbaf38 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -85,7 +85,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
85 */ 85 */
86 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 86 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
87 h = apic_read(APIC_LVTTHMR); 87 h = apic_read(APIC_LVTTHMR);
88 if ((l & (1<<3)) && (h & APIC_DM_SMI)) { 88 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", 89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
90 cpu); 90 cpu);
91 return; /* -EBUSY */ 91 return; /* -EBUSY */
@@ -111,7 +111,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
111 vendor_thermal_interrupt = intel_thermal_interrupt; 111 vendor_thermal_interrupt = intel_thermal_interrupt;
112 112
113 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 113 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
114 wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); 114 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
115 115
116 l = apic_read(APIC_LVTTHMR); 116 l = apic_read(APIC_LVTTHMR);
117 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 117 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
new file mode 100644
index 00000000000..23ee9e730f7
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -0,0 +1,29 @@
1/*
2 * Common corrected MCE threshold handler code:
3 */
4#include <linux/interrupt.h>
5#include <linux/kernel.h>
6
7#include <asm/irq_vectors.h>
8#include <asm/apic.h>
9#include <asm/idle.h>
10#include <asm/mce.h>
11
12static void default_threshold_interrupt(void)
13{
14 printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
15 THRESHOLD_APIC_VECTOR);
16}
17
18void (*mce_threshold_vector)(void) = default_threshold_interrupt;
19
20asmlinkage void mce_threshold_interrupt(void)
21{
22 exit_idle();
23 irq_enter();
24 inc_irq_stat(irq_threshold_count);
25 mce_threshold_vector();
26 irq_exit();
27 /* Ack only at the end to avoid potential reentry */
28 ack_APIC_irq();
29}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 9abd48b2267..f6c70a164e3 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -19,7 +19,7 @@
19#include <linux/nmi.h> 19#include <linux/nmi.h>
20#include <linux/kprobes.h> 20#include <linux/kprobes.h>
21 21
22#include <asm/apic.h> 22#include <asm/genapic.h>
23#include <asm/intel_arch_perfmon.h> 23#include <asm/intel_arch_perfmon.h>
24 24
25struct nmi_watchdog_ctlblk { 25struct nmi_watchdog_ctlblk {
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 01b1244ef1c..d67e0e48bc2 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -7,11 +7,10 @@
7/* 7/*
8 * Get CPU information for use by the procfs. 8 * Get CPU information for use by the procfs.
9 */ 9 */
10#ifdef CONFIG_X86_32
11static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c, 10static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
12 unsigned int cpu) 11 unsigned int cpu)
13{ 12{
14#ifdef CONFIG_X86_HT 13#ifdef CONFIG_SMP
15 if (c->x86_max_cores * smp_num_siblings > 1) { 14 if (c->x86_max_cores * smp_num_siblings > 1) {
16 seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); 15 seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
17 seq_printf(m, "siblings\t: %d\n", 16 seq_printf(m, "siblings\t: %d\n",
@@ -24,6 +23,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
24#endif 23#endif
25} 24}
26 25
26#ifdef CONFIG_X86_32
27static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) 27static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
28{ 28{
29 /* 29 /*
@@ -50,22 +50,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
50 c->wp_works_ok ? "yes" : "no"); 50 c->wp_works_ok ? "yes" : "no");
51} 51}
52#else 52#else
53static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
54 unsigned int cpu)
55{
56#ifdef CONFIG_SMP
57 if (c->x86_max_cores * smp_num_siblings > 1) {
58 seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
59 seq_printf(m, "siblings\t: %d\n",
60 cpus_weight(per_cpu(cpu_core_map, cpu)));
61 seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
62 seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
63 seq_printf(m, "apicid\t\t: %d\n", c->apicid);
64 seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid);
65 }
66#endif
67}
68
69static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) 53static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
70{ 54{
71 seq_printf(m, 55 seq_printf(m,
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 52b3fefbd5a..bb62b3e5caa 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -98,7 +98,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
98#endif 98#endif
99} 99}
100 100
101static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { 101static const struct cpu_dev __cpuinitconst transmeta_cpu_dev = {
102 .c_vendor = "Transmeta", 102 .c_vendor = "Transmeta",
103 .c_ident = { "GenuineTMx86", "TransmetaCPU" }, 103 .c_ident = { "GenuineTMx86", "TransmetaCPU" },
104 .c_early_init = early_init_transmeta, 104 .c_early_init = early_init_transmeta,
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index e777f79e096..fd2c37bf7ac 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -8,7 +8,7 @@
8 * so no special init takes place. 8 * so no special init takes place.
9 */ 9 */
10 10
11static struct cpu_dev umc_cpu_dev __cpuinitdata = { 11static const struct cpu_dev __cpuinitconst umc_cpu_dev = {
12 .c_vendor = "UMC", 12 .c_vendor = "UMC",
13 .c_ident = { "UMC UMC UMC" }, 13 .c_ident = { "UMC UMC UMC" },
14 .c_models = { 14 .c_models = {