aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/amd.c10
-rw-r--r--arch/x86/kernel/cpu/cpu_debug.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c88
-rw-r--r--arch/x86/kernel/cpu/intel.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile5
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c116
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c158
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c312
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c94
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c163
-rw-r--r--arch/x86/kernel/cpu/mcheck/p6.c127
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c13
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c46
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c14
-rw-r--r--arch/x86/kernel/cpu/sched.c55
20 files changed, 456 insertions, 792 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index c1f253dac155..8dd30638fe44 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp)
13 13
14obj-y := intel_cacheinfo.o addon_cpuid_features.o 14obj-y := intel_cacheinfo.o addon_cpuid_features.o
15obj-y += proc.o capflags.o powerflags.o common.o 15obj-y += proc.o capflags.o powerflags.o common.o
16obj-y += vmware.o hypervisor.o 16obj-y += vmware.o hypervisor.o sched.o
17 17
18obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o 18obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
19obj-$(CONFIG_X86_64) += bugs_64.o 19obj-$(CONFIG_X86_64) += bugs_64.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 22a47c82f3c0..f32fa71ccf97 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -333,6 +333,16 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
333#endif 333#endif
334} 334}
335 335
336int amd_get_nb_id(int cpu)
337{
338 int id = 0;
339#ifdef CONFIG_SMP
340 id = per_cpu(cpu_llc_id, cpu);
341#endif
342 return id;
343}
344EXPORT_SYMBOL_GPL(amd_get_nb_id);
345
336static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) 346static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
337{ 347{
338#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 348#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
index 6b2a52dd0403..dca325c03999 100644
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -30,8 +30,8 @@
30#include <asm/apic.h> 30#include <asm/apic.h>
31#include <asm/desc.h> 31#include <asm/desc.h>
32 32
33static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); 33static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr);
34static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); 34static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr);
35static DEFINE_PER_CPU(int, cpu_priv_count); 35static DEFINE_PER_CPU(int, cpu_priv_count);
36 36
37static DEFINE_MUTEX(cpu_debug_lock); 37static DEFINE_MUTEX(cpu_debug_lock);
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index ae9b503220ca..4109679863c1 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -60,7 +60,6 @@ enum {
60}; 60};
61 61
62#define INTEL_MSR_RANGE (0xffff) 62#define INTEL_MSR_RANGE (0xffff)
63#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1)
64 63
65struct acpi_cpufreq_data { 64struct acpi_cpufreq_data {
66 struct acpi_processor_performance *acpi_data; 65 struct acpi_processor_performance *acpi_data;
@@ -71,11 +70,7 @@ struct acpi_cpufreq_data {
71 70
72static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); 71static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
73 72
74struct acpi_msr_data { 73static DEFINE_PER_CPU(struct aperfmperf, old_perf);
75 u64 saved_aperf, saved_mperf;
76};
77
78static DEFINE_PER_CPU(struct acpi_msr_data, msr_data);
79 74
80DEFINE_TRACE(power_mark); 75DEFINE_TRACE(power_mark);
81 76
@@ -244,23 +239,12 @@ static u32 get_cur_val(const struct cpumask *mask)
244 return cmd.val; 239 return cmd.val;
245} 240}
246 241
247struct perf_pair {
248 union {
249 struct {
250 u32 lo;
251 u32 hi;
252 } split;
253 u64 whole;
254 } aperf, mperf;
255};
256
257/* Called via smp_call_function_single(), on the target CPU */ 242/* Called via smp_call_function_single(), on the target CPU */
258static void read_measured_perf_ctrs(void *_cur) 243static void read_measured_perf_ctrs(void *_cur)
259{ 244{
260 struct perf_pair *cur = _cur; 245 struct aperfmperf *am = _cur;
261 246
262 rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); 247 get_aperfmperf(am);
263 rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
264} 248}
265 249
266/* 250/*
@@ -279,63 +263,17 @@ static void read_measured_perf_ctrs(void *_cur)
279static unsigned int get_measured_perf(struct cpufreq_policy *policy, 263static unsigned int get_measured_perf(struct cpufreq_policy *policy,
280 unsigned int cpu) 264 unsigned int cpu)
281{ 265{
282 struct perf_pair readin, cur; 266 struct aperfmperf perf;
283 unsigned int perf_percent; 267 unsigned long ratio;
284 unsigned int retval; 268 unsigned int retval;
285 269
286 if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) 270 if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
287 return 0; 271 return 0;
288 272
289 cur.aperf.whole = readin.aperf.whole - 273 ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf);
290 per_cpu(msr_data, cpu).saved_aperf; 274 per_cpu(old_perf, cpu) = perf;
291 cur.mperf.whole = readin.mperf.whole -
292 per_cpu(msr_data, cpu).saved_mperf;
293 per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole;
294 per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole;
295
296#ifdef __i386__
297 /*
298 * We dont want to do 64 bit divide with 32 bit kernel
299 * Get an approximate value. Return failure in case we cannot get
300 * an approximate value.
301 */
302 if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
303 int shift_count;
304 u32 h;
305
306 h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi);
307 shift_count = fls(h);
308
309 cur.aperf.whole >>= shift_count;
310 cur.mperf.whole >>= shift_count;
311 }
312
313 if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
314 int shift_count = 7;
315 cur.aperf.split.lo >>= shift_count;
316 cur.mperf.split.lo >>= shift_count;
317 }
318
319 if (cur.aperf.split.lo && cur.mperf.split.lo)
320 perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
321 else
322 perf_percent = 0;
323 275
324#else 276 retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
325 if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
326 int shift_count = 7;
327 cur.aperf.whole >>= shift_count;
328 cur.mperf.whole >>= shift_count;
329 }
330
331 if (cur.aperf.whole && cur.mperf.whole)
332 perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
333 else
334 perf_percent = 0;
335
336#endif
337
338 retval = (policy->cpuinfo.max_freq * perf_percent) / 100;
339 277
340 return retval; 278 return retval;
341} 279}
@@ -731,12 +669,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
731 acpi_processor_notify_smm(THIS_MODULE); 669 acpi_processor_notify_smm(THIS_MODULE);
732 670
733 /* Check for APERF/MPERF support in hardware */ 671 /* Check for APERF/MPERF support in hardware */
734 if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { 672 if (cpu_has(c, X86_FEATURE_APERFMPERF))
735 unsigned int ecx; 673 acpi_cpufreq_driver.getavg = get_measured_perf;
736 ecx = cpuid_ecx(6);
737 if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY)
738 acpi_cpufreq_driver.getavg = get_measured_perf;
739 }
740 674
741 dprintk("CPU%u - ACPI performance management activated.\n", cpu); 675 dprintk("CPU%u - ACPI performance management activated.\n", cpu);
742 for (i = 0; i < perf->state_count; i++) 676 for (i = 0; i < perf->state_count; i++)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 80a722a071b5..40e1835b35e8 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -350,6 +350,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
350 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); 350 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
351 } 351 }
352 352
353 if (c->cpuid_level > 6) {
354 unsigned ecx = cpuid_ecx(6);
355 if (ecx & 0x01)
356 set_cpu_cap(c, X86_FEATURE_APERFMPERF);
357 }
358
353 if (cpu_has_xmm2) 359 if (cpu_has_xmm2)
354 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); 360 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
355 if (cpu_has_ds) { 361 if (cpu_has_ds) {
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 188a1ca5ad2b..4ac6d48fe11b 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,11 +1,8 @@
1obj-y = mce.o 1obj-y = mce.o mce-severity.o
2 2
3obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
4obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
5obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o 3obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
6obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 4obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
7obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o 5obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
8obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
9obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o 6obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
10obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o 7obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
11 8
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
deleted file mode 100644
index b945d5dbc609..000000000000
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ /dev/null
@@ -1,116 +0,0 @@
1/*
2 * Athlon specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Dave Jones <davej@redhat.com>
4 */
5#include <linux/interrupt.h>
6#include <linux/kernel.h>
7#include <linux/types.h>
8#include <linux/init.h>
9#include <linux/smp.h>
10
11#include <asm/processor.h>
12#include <asm/system.h>
13#include <asm/mce.h>
14#include <asm/msr.h>
15
16/* Machine Check Handler For AMD Athlon/Duron: */
17static void k7_machine_check(struct pt_regs *regs, long error_code)
18{
19 u32 alow, ahigh, high, low;
20 u32 mcgstl, mcgsth;
21 int recover = 1;
22 int i;
23
24 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
25 if (mcgstl & (1<<0)) /* Recoverable ? */
26 recover = 0;
27
28 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
29 smp_processor_id(), mcgsth, mcgstl);
30
31 for (i = 1; i < nr_mce_banks; i++) {
32 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
33 if (high & (1<<31)) {
34 char misc[20];
35 char addr[24];
36
37 misc[0] = '\0';
38 addr[0] = '\0';
39
40 if (high & (1<<29))
41 recover |= 1;
42 if (high & (1<<25))
43 recover |= 2;
44 high &= ~(1<<31);
45
46 if (high & (1<<27)) {
47 rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
48 snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
49 }
50 if (high & (1<<26)) {
51 rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
52 snprintf(addr, 24, " at %08x%08x", ahigh, alow);
53 }
54
55 printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
56 smp_processor_id(), i, high, low, misc, addr);
57
58 /* Clear it: */
59 wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
60 /* Serialize: */
61 wmb();
62 add_taint(TAINT_MACHINE_CHECK);
63 }
64 }
65
66 if (recover & 2)
67 panic("CPU context corrupt");
68 if (recover & 1)
69 panic("Unable to continue");
70
71 printk(KERN_EMERG "Attempting to continue.\n");
72
73 mcgstl &= ~(1<<2);
74 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
75}
76
77
78/* AMD K7 machine check is Intel like: */
79void amd_mcheck_init(struct cpuinfo_x86 *c)
80{
81 u32 l, h;
82 int i;
83
84 if (!cpu_has(c, X86_FEATURE_MCE))
85 return;
86
87 machine_check_vector = k7_machine_check;
88 /* Make sure the vector pointer is visible before we enable MCEs: */
89 wmb();
90
91 printk(KERN_INFO "Intel machine check architecture supported.\n");
92
93 rdmsr(MSR_IA32_MCG_CAP, l, h);
94 if (l & (1<<8)) /* Control register present ? */
95 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
96 nr_mce_banks = l & 0xff;
97
98 /*
99 * Clear status for MC index 0 separately, we don't touch CTL,
100 * as some K7 Athlons cause spurious MCEs when its enabled:
101 */
102 if (boot_cpu_data.x86 == 6) {
103 wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
104 i = 1;
105 } else
106 i = 0;
107
108 for (; i < nr_mce_banks; i++) {
109 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
110 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
111 }
112
113 set_in_cr4(X86_CR4_MCE);
114 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
115 smp_processor_id());
116}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index a3a235a53f09..7029f0e2acad 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -18,7 +18,12 @@
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/fs.h> 19#include <linux/fs.h>
20#include <linux/smp.h> 20#include <linux/smp.h>
21#include <linux/notifier.h>
22#include <linux/kdebug.h>
23#include <linux/cpu.h>
24#include <linux/sched.h>
21#include <asm/mce.h> 25#include <asm/mce.h>
26#include <asm/apic.h>
22 27
23/* Update fake mce registers on current CPU. */ 28/* Update fake mce registers on current CPU. */
24static void inject_mce(struct mce *m) 29static void inject_mce(struct mce *m)
@@ -39,44 +44,141 @@ static void inject_mce(struct mce *m)
39 i->finished = 1; 44 i->finished = 1;
40} 45}
41 46
42struct delayed_mce { 47static void raise_poll(struct mce *m)
43 struct timer_list timer; 48{
44 struct mce m; 49 unsigned long flags;
45}; 50 mce_banks_t b;
46 51
47/* Inject mce on current CPU */ 52 memset(&b, 0xff, sizeof(mce_banks_t));
48static void raise_mce(unsigned long data) 53 local_irq_save(flags);
54 machine_check_poll(0, &b);
55 local_irq_restore(flags);
56 m->finished = 0;
57}
58
59static void raise_exception(struct mce *m, struct pt_regs *pregs)
49{ 60{
50 struct delayed_mce *dm = (struct delayed_mce *)data; 61 struct pt_regs regs;
51 struct mce *m = &dm->m; 62 unsigned long flags;
52 int cpu = m->extcpu;
53 63
54 inject_mce(m); 64 if (!pregs) {
55 if (m->status & MCI_STATUS_UC) {
56 struct pt_regs regs;
57 memset(&regs, 0, sizeof(struct pt_regs)); 65 memset(&regs, 0, sizeof(struct pt_regs));
58 regs.ip = m->ip; 66 regs.ip = m->ip;
59 regs.cs = m->cs; 67 regs.cs = m->cs;
68 pregs = &regs;
69 }
70 /* in mcheck exeception handler, irq will be disabled */
71 local_irq_save(flags);
72 do_machine_check(pregs, 0);
73 local_irq_restore(flags);
74 m->finished = 0;
75}
76
77static cpumask_t mce_inject_cpumask;
78
79static int mce_raise_notify(struct notifier_block *self,
80 unsigned long val, void *data)
81{
82 struct die_args *args = (struct die_args *)data;
83 int cpu = smp_processor_id();
84 struct mce *m = &__get_cpu_var(injectm);
85 if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask))
86 return NOTIFY_DONE;
87 cpu_clear(cpu, mce_inject_cpumask);
88 if (m->inject_flags & MCJ_EXCEPTION)
89 raise_exception(m, args->regs);
90 else if (m->status)
91 raise_poll(m);
92 return NOTIFY_STOP;
93}
94
95static struct notifier_block mce_raise_nb = {
96 .notifier_call = mce_raise_notify,
97 .priority = 1000,
98};
99
100/* Inject mce on current CPU */
101static int raise_local(struct mce *m)
102{
103 int context = MCJ_CTX(m->inject_flags);
104 int ret = 0;
105 int cpu = m->extcpu;
106
107 if (m->inject_flags & MCJ_EXCEPTION) {
60 printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); 108 printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
61 do_machine_check(&regs, 0); 109 switch (context) {
110 case MCJ_CTX_IRQ:
111 /*
112 * Could do more to fake interrupts like
113 * calling irq_enter, but the necessary
114 * machinery isn't exported currently.
115 */
116 /*FALL THROUGH*/
117 case MCJ_CTX_PROCESS:
118 raise_exception(m, NULL);
119 break;
120 default:
121 printk(KERN_INFO "Invalid MCE context\n");
122 ret = -EINVAL;
123 }
62 printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); 124 printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
63 } else { 125 } else if (m->status) {
64 mce_banks_t b;
65 memset(&b, 0xff, sizeof(mce_banks_t));
66 printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); 126 printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
67 machine_check_poll(0, &b); 127 raise_poll(m);
68 mce_notify_irq(); 128 mce_notify_irq();
69 printk(KERN_INFO "Finished machine check poll on CPU %d\n", 129 printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
70 cpu); 130 } else
71 } 131 m->finished = 0;
72 kfree(dm); 132
133 return ret;
134}
135
136static void raise_mce(struct mce *m)
137{
138 int context = MCJ_CTX(m->inject_flags);
139
140 inject_mce(m);
141
142 if (context == MCJ_CTX_RANDOM)
143 return;
144
145#ifdef CONFIG_X86_LOCAL_APIC
146 if (m->inject_flags & MCJ_NMI_BROADCAST) {
147 unsigned long start;
148 int cpu;
149 get_online_cpus();
150 mce_inject_cpumask = cpu_online_map;
151 cpu_clear(get_cpu(), mce_inject_cpumask);
152 for_each_online_cpu(cpu) {
153 struct mce *mcpu = &per_cpu(injectm, cpu);
154 if (!mcpu->finished ||
155 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
156 cpu_clear(cpu, mce_inject_cpumask);
157 }
158 if (!cpus_empty(mce_inject_cpumask))
159 apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR);
160 start = jiffies;
161 while (!cpus_empty(mce_inject_cpumask)) {
162 if (!time_before(jiffies, start + 2*HZ)) {
163 printk(KERN_ERR
164 "Timeout waiting for mce inject NMI %lx\n",
165 *cpus_addr(mce_inject_cpumask));
166 break;
167 }
168 cpu_relax();
169 }
170 raise_local(m);
171 put_cpu();
172 put_online_cpus();
173 } else
174#endif
175 raise_local(m);
73} 176}
74 177
75/* Error injection interface */ 178/* Error injection interface */
76static ssize_t mce_write(struct file *filp, const char __user *ubuf, 179static ssize_t mce_write(struct file *filp, const char __user *ubuf,
77 size_t usize, loff_t *off) 180 size_t usize, loff_t *off)
78{ 181{
79 struct delayed_mce *dm;
80 struct mce m; 182 struct mce m;
81 183
82 if (!capable(CAP_SYS_ADMIN)) 184 if (!capable(CAP_SYS_ADMIN))
@@ -96,19 +198,12 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
96 if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) 198 if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
97 return -EINVAL; 199 return -EINVAL;
98 200
99 dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL);
100 if (!dm)
101 return -ENOMEM;
102
103 /* 201 /*
104 * Need to give user space some time to set everything up, 202 * Need to give user space some time to set everything up,
105 * so do it a jiffie or two later everywhere. 203 * so do it a jiffie or two later everywhere.
106 * Should we use a hrtimer here for better synchronization?
107 */ 204 */
108 memcpy(&dm->m, &m, sizeof(struct mce)); 205 schedule_timeout(2);
109 setup_timer(&dm->timer, raise_mce, (unsigned long)dm); 206 raise_mce(&m);
110 dm->timer.expires = jiffies + 2;
111 add_timer_on(&dm->timer, m.extcpu);
112 return usize; 207 return usize;
113} 208}
114 209
@@ -116,6 +211,7 @@ static int inject_init(void)
116{ 211{
117 printk(KERN_INFO "Machine check injector initialized\n"); 212 printk(KERN_INFO "Machine check injector initialized\n");
118 mce_chrdev_ops.write = mce_write; 213 mce_chrdev_ops.write = mce_write;
214 register_die_notifier(&mce_raise_nb);
119 return 0; 215 return 0;
120} 216}
121 217
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 54dcb8ff12e5..32996f9fab67 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -1,3 +1,4 @@
1#include <linux/sysdev.h>
1#include <asm/mce.h> 2#include <asm/mce.h>
2 3
3enum severity_level { 4enum severity_level {
@@ -10,6 +11,20 @@ enum severity_level {
10 MCE_PANIC_SEVERITY, 11 MCE_PANIC_SEVERITY,
11}; 12};
12 13
14#define ATTR_LEN 16
15
16/* One object for each MCE bank, shared by all CPUs */
17struct mce_bank {
18 u64 ctl; /* subevents to enable */
19 unsigned char init; /* initialise bank? */
20 struct sysdev_attribute attr; /* sysdev attribute */
21 char attrname[ATTR_LEN]; /* attribute name */
22};
23
13int mce_severity(struct mce *a, int tolerant, char **msg); 24int mce_severity(struct mce *a, int tolerant, char **msg);
25struct dentry *mce_get_debugfs_dir(void);
14 26
15extern int mce_ser; 27extern int mce_ser;
28
29extern struct mce_bank *mce_banks;
30
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index ff0807f97056..8a85dd1b1aa1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -139,6 +139,7 @@ int mce_severity(struct mce *a, int tolerant, char **msg)
139 } 139 }
140} 140}
141 141
142#ifdef CONFIG_DEBUG_FS
142static void *s_start(struct seq_file *f, loff_t *pos) 143static void *s_start(struct seq_file *f, loff_t *pos)
143{ 144{
144 if (*pos >= ARRAY_SIZE(severities)) 145 if (*pos >= ARRAY_SIZE(severities))
@@ -197,7 +198,7 @@ static int __init severities_debugfs_init(void)
197{ 198{
198 struct dentry *dmce = NULL, *fseverities_coverage = NULL; 199 struct dentry *dmce = NULL, *fseverities_coverage = NULL;
199 200
200 dmce = debugfs_create_dir("mce", NULL); 201 dmce = mce_get_debugfs_dir();
201 if (dmce == NULL) 202 if (dmce == NULL)
202 goto err_out; 203 goto err_out;
203 fseverities_coverage = debugfs_create_file("severities-coverage", 204 fseverities_coverage = debugfs_create_file("severities-coverage",
@@ -209,10 +210,7 @@ static int __init severities_debugfs_init(void)
209 return 0; 210 return 0;
210 211
211err_out: 212err_out:
212 if (fseverities_coverage)
213 debugfs_remove(fseverities_coverage);
214 if (dmce)
215 debugfs_remove(dmce);
216 return -ENOMEM; 213 return -ENOMEM;
217} 214}
218late_initcall(severities_debugfs_init); 215late_initcall(severities_debugfs_init);
216#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9bfe9d2ea615..2f5aab26320e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -34,6 +34,7 @@
34#include <linux/smp.h> 34#include <linux/smp.h>
35#include <linux/fs.h> 35#include <linux/fs.h>
36#include <linux/mm.h> 36#include <linux/mm.h>
37#include <linux/debugfs.h>
37 38
38#include <asm/processor.h> 39#include <asm/processor.h>
39#include <asm/hw_irq.h> 40#include <asm/hw_irq.h>
@@ -45,21 +46,8 @@
45 46
46#include "mce-internal.h" 47#include "mce-internal.h"
47 48
48/* Handle unconfigured int18 (should never happen) */
49static void unexpected_machine_check(struct pt_regs *regs, long error_code)
50{
51 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
52 smp_processor_id());
53}
54
55/* Call the installed machine check handler for this CPU setup. */
56void (*machine_check_vector)(struct pt_regs *, long error_code) =
57 unexpected_machine_check;
58
59int mce_disabled __read_mostly; 49int mce_disabled __read_mostly;
60 50
61#ifdef CONFIG_X86_NEW_MCE
62
63#define MISC_MCELOG_MINOR 227 51#define MISC_MCELOG_MINOR 227
64 52
65#define SPINUNIT 100 /* 100ns */ 53#define SPINUNIT 100 /* 100ns */
@@ -77,7 +65,6 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
77 */ 65 */
78static int tolerant __read_mostly = 1; 66static int tolerant __read_mostly = 1;
79static int banks __read_mostly; 67static int banks __read_mostly;
80static u64 *bank __read_mostly;
81static int rip_msr __read_mostly; 68static int rip_msr __read_mostly;
82static int mce_bootlog __read_mostly = -1; 69static int mce_bootlog __read_mostly = -1;
83static int monarch_timeout __read_mostly = -1; 70static int monarch_timeout __read_mostly = -1;
@@ -87,13 +74,13 @@ int mce_cmci_disabled __read_mostly;
87int mce_ignore_ce __read_mostly; 74int mce_ignore_ce __read_mostly;
88int mce_ser __read_mostly; 75int mce_ser __read_mostly;
89 76
77struct mce_bank *mce_banks __read_mostly;
78
90/* User mode helper program triggered by machine check event */ 79/* User mode helper program triggered by machine check event */
91static unsigned long mce_need_notify; 80static unsigned long mce_need_notify;
92static char mce_helper[128]; 81static char mce_helper[128];
93static char *mce_helper_argv[2] = { mce_helper, NULL }; 82static char *mce_helper_argv[2] = { mce_helper, NULL };
94 83
95static unsigned long dont_init_banks;
96
97static DECLARE_WAIT_QUEUE_HEAD(mce_wait); 84static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
98static DEFINE_PER_CPU(struct mce, mces_seen); 85static DEFINE_PER_CPU(struct mce, mces_seen);
99static int cpu_missing; 86static int cpu_missing;
@@ -104,11 +91,6 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
104 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 91 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
105}; 92};
106 93
107static inline int skip_bank_init(int i)
108{
109 return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
110}
111
112static DEFINE_PER_CPU(struct work_struct, mce_work); 94static DEFINE_PER_CPU(struct work_struct, mce_work);
113 95
114/* Do initial initialization of a struct mce */ 96/* Do initial initialization of a struct mce */
@@ -232,6 +214,9 @@ static void print_mce_tail(void)
232 214
233static atomic_t mce_paniced; 215static atomic_t mce_paniced;
234 216
217static int fake_panic;
218static atomic_t mce_fake_paniced;
219
235/* Panic in progress. Enable interrupts and wait for final IPI */ 220/* Panic in progress. Enable interrupts and wait for final IPI */
236static void wait_for_panic(void) 221static void wait_for_panic(void)
237{ 222{
@@ -249,15 +234,21 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
249{ 234{
250 int i; 235 int i;
251 236
252 /* 237 if (!fake_panic) {
253 * Make sure only one CPU runs in machine check panic 238 /*
254 */ 239 * Make sure only one CPU runs in machine check panic
255 if (atomic_add_return(1, &mce_paniced) > 1) 240 */
256 wait_for_panic(); 241 if (atomic_inc_return(&mce_paniced) > 1)
257 barrier(); 242 wait_for_panic();
243 barrier();
258 244
259 bust_spinlocks(1); 245 bust_spinlocks(1);
260 console_verbose(); 246 console_verbose();
247 } else {
248 /* Don't log too much for fake panic */
249 if (atomic_inc_return(&mce_fake_paniced) > 1)
250 return;
251 }
261 print_mce_head(); 252 print_mce_head();
262 /* First print corrected ones that are still unlogged */ 253 /* First print corrected ones that are still unlogged */
263 for (i = 0; i < MCE_LOG_LEN; i++) { 254 for (i = 0; i < MCE_LOG_LEN; i++) {
@@ -284,9 +275,12 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
284 print_mce_tail(); 275 print_mce_tail();
285 if (exp) 276 if (exp)
286 printk(KERN_EMERG "Machine check: %s\n", exp); 277 printk(KERN_EMERG "Machine check: %s\n", exp);
287 if (panic_timeout == 0) 278 if (!fake_panic) {
288 panic_timeout = mce_panic_timeout; 279 if (panic_timeout == 0)
289 panic(msg); 280 panic_timeout = mce_panic_timeout;
281 panic(msg);
282 } else
283 printk(KERN_EMERG "Fake kernel panic: %s\n", msg);
290} 284}
291 285
292/* Support code for software error injection */ 286/* Support code for software error injection */
@@ -296,11 +290,11 @@ static int msr_to_offset(u32 msr)
296 unsigned bank = __get_cpu_var(injectm.bank); 290 unsigned bank = __get_cpu_var(injectm.bank);
297 if (msr == rip_msr) 291 if (msr == rip_msr)
298 return offsetof(struct mce, ip); 292 return offsetof(struct mce, ip);
299 if (msr == MSR_IA32_MC0_STATUS + bank*4) 293 if (msr == MSR_IA32_MCx_STATUS(bank))
300 return offsetof(struct mce, status); 294 return offsetof(struct mce, status);
301 if (msr == MSR_IA32_MC0_ADDR + bank*4) 295 if (msr == MSR_IA32_MCx_ADDR(bank))
302 return offsetof(struct mce, addr); 296 return offsetof(struct mce, addr);
303 if (msr == MSR_IA32_MC0_MISC + bank*4) 297 if (msr == MSR_IA32_MCx_MISC(bank))
304 return offsetof(struct mce, misc); 298 return offsetof(struct mce, misc);
305 if (msr == MSR_IA32_MCG_STATUS) 299 if (msr == MSR_IA32_MCG_STATUS)
306 return offsetof(struct mce, mcgstatus); 300 return offsetof(struct mce, mcgstatus);
@@ -505,7 +499,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
505 499
506 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 500 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
507 for (i = 0; i < banks; i++) { 501 for (i = 0; i < banks; i++) {
508 if (!bank[i] || !test_bit(i, *b)) 502 if (!mce_banks[i].ctl || !test_bit(i, *b))
509 continue; 503 continue;
510 504
511 m.misc = 0; 505 m.misc = 0;
@@ -514,7 +508,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
514 m.tsc = 0; 508 m.tsc = 0;
515 509
516 barrier(); 510 barrier();
517 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); 511 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
518 if (!(m.status & MCI_STATUS_VAL)) 512 if (!(m.status & MCI_STATUS_VAL))
519 continue; 513 continue;
520 514
@@ -529,9 +523,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
529 continue; 523 continue;
530 524
531 if (m.status & MCI_STATUS_MISCV) 525 if (m.status & MCI_STATUS_MISCV)
532 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); 526 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
533 if (m.status & MCI_STATUS_ADDRV) 527 if (m.status & MCI_STATUS_ADDRV)
534 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); 528 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
535 529
536 if (!(flags & MCP_TIMESTAMP)) 530 if (!(flags & MCP_TIMESTAMP))
537 m.tsc = 0; 531 m.tsc = 0;
@@ -547,7 +541,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
547 /* 541 /*
548 * Clear state for this bank. 542 * Clear state for this bank.
549 */ 543 */
550 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 544 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
551 } 545 }
552 546
553 /* 547 /*
@@ -568,7 +562,7 @@ static int mce_no_way_out(struct mce *m, char **msg)
568 int i; 562 int i;
569 563
570 for (i = 0; i < banks; i++) { 564 for (i = 0; i < banks; i++) {
571 m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); 565 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
572 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) 566 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
573 return 1; 567 return 1;
574 } 568 }
@@ -628,7 +622,7 @@ out:
628 * This way we prevent any potential data corruption in a unrecoverable case 622 * This way we prevent any potential data corruption in a unrecoverable case
629 * and also makes sure always all CPU's errors are examined. 623 * and also makes sure always all CPU's errors are examined.
630 * 624 *
631 * Also this detects the case of an machine check event coming from outer 625 * Also this detects the case of a machine check event coming from outer
632 * space (not detected by any CPUs) In this case some external agent wants 626 * space (not detected by any CPUs) In this case some external agent wants
633 * us to shut down, so panic too. 627 * us to shut down, so panic too.
634 * 628 *
@@ -681,7 +675,7 @@ static void mce_reign(void)
681 * No machine check event found. Must be some external 675 * No machine check event found. Must be some external
682 * source or one CPU is hung. Panic. 676 * source or one CPU is hung. Panic.
683 */ 677 */
684 if (!m && tolerant < 3) 678 if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3)
685 mce_panic("Machine check from unknown source", NULL, NULL); 679 mce_panic("Machine check from unknown source", NULL, NULL);
686 680
687 /* 681 /*
@@ -715,7 +709,7 @@ static int mce_start(int *no_way_out)
715 * global_nwo should be updated before mce_callin 709 * global_nwo should be updated before mce_callin
716 */ 710 */
717 smp_wmb(); 711 smp_wmb();
718 order = atomic_add_return(1, &mce_callin); 712 order = atomic_inc_return(&mce_callin);
719 713
720 /* 714 /*
721 * Wait for everyone. 715 * Wait for everyone.
@@ -852,7 +846,7 @@ static void mce_clear_state(unsigned long *toclear)
852 846
853 for (i = 0; i < banks; i++) { 847 for (i = 0; i < banks; i++) {
854 if (test_bit(i, toclear)) 848 if (test_bit(i, toclear))
855 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 849 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
856 } 850 }
857} 851}
858 852
@@ -905,11 +899,11 @@ void do_machine_check(struct pt_regs *regs, long error_code)
905 mce_setup(&m); 899 mce_setup(&m);
906 900
907 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 901 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
908 no_way_out = mce_no_way_out(&m, &msg);
909
910 final = &__get_cpu_var(mces_seen); 902 final = &__get_cpu_var(mces_seen);
911 *final = m; 903 *final = m;
912 904
905 no_way_out = mce_no_way_out(&m, &msg);
906
913 barrier(); 907 barrier();
914 908
915 /* 909 /*
@@ -926,14 +920,14 @@ void do_machine_check(struct pt_regs *regs, long error_code)
926 order = mce_start(&no_way_out); 920 order = mce_start(&no_way_out);
927 for (i = 0; i < banks; i++) { 921 for (i = 0; i < banks; i++) {
928 __clear_bit(i, toclear); 922 __clear_bit(i, toclear);
929 if (!bank[i]) 923 if (!mce_banks[i].ctl)
930 continue; 924 continue;
931 925
932 m.misc = 0; 926 m.misc = 0;
933 m.addr = 0; 927 m.addr = 0;
934 m.bank = i; 928 m.bank = i;
935 929
936 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); 930 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
937 if ((m.status & MCI_STATUS_VAL) == 0) 931 if ((m.status & MCI_STATUS_VAL) == 0)
938 continue; 932 continue;
939 933
@@ -974,9 +968,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
974 kill_it = 1; 968 kill_it = 1;
975 969
976 if (m.status & MCI_STATUS_MISCV) 970 if (m.status & MCI_STATUS_MISCV)
977 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); 971 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
978 if (m.status & MCI_STATUS_ADDRV) 972 if (m.status & MCI_STATUS_ADDRV)
979 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); 973 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
980 974
981 /* 975 /*
982 * Action optional error. Queue address for later processing. 976 * Action optional error. Queue address for later processing.
@@ -1101,7 +1095,7 @@ void mce_log_therm_throt_event(__u64 status)
1101 */ 1095 */
1102static int check_interval = 5 * 60; /* 5 minutes */ 1096static int check_interval = 5 * 60; /* 5 minutes */
1103 1097
1104static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ 1098static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
1105static DEFINE_PER_CPU(struct timer_list, mce_timer); 1099static DEFINE_PER_CPU(struct timer_list, mce_timer);
1106 1100
1107static void mcheck_timer(unsigned long data) 1101static void mcheck_timer(unsigned long data)
@@ -1120,7 +1114,7 @@ static void mcheck_timer(unsigned long data)
1120 * Alert userspace if needed. If we logged an MCE, reduce the 1114 * Alert userspace if needed. If we logged an MCE, reduce the
1121 * polling interval, otherwise increase the polling interval. 1115 * polling interval, otherwise increase the polling interval.
1122 */ 1116 */
1123 n = &__get_cpu_var(next_interval); 1117 n = &__get_cpu_var(mce_next_interval);
1124 if (mce_notify_irq()) 1118 if (mce_notify_irq())
1125 *n = max(*n/2, HZ/100); 1119 *n = max(*n/2, HZ/100);
1126 else 1120 else
@@ -1169,10 +1163,25 @@ int mce_notify_irq(void)
1169} 1163}
1170EXPORT_SYMBOL_GPL(mce_notify_irq); 1164EXPORT_SYMBOL_GPL(mce_notify_irq);
1171 1165
1166static int mce_banks_init(void)
1167{
1168 int i;
1169
1170 mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
1171 if (!mce_banks)
1172 return -ENOMEM;
1173 for (i = 0; i < banks; i++) {
1174 struct mce_bank *b = &mce_banks[i];
1175 b->ctl = -1ULL;
1176 b->init = 1;
1177 }
1178 return 0;
1179}
1180
1172/* 1181/*
1173 * Initialize Machine Checks for a CPU. 1182 * Initialize Machine Checks for a CPU.
1174 */ 1183 */
1175static int mce_cap_init(void) 1184static int __cpuinit mce_cap_init(void)
1176{ 1185{
1177 unsigned b; 1186 unsigned b;
1178 u64 cap; 1187 u64 cap;
@@ -1192,11 +1201,10 @@ static int mce_cap_init(void)
1192 /* Don't support asymmetric configurations today */ 1201 /* Don't support asymmetric configurations today */
1193 WARN_ON(banks != 0 && b != banks); 1202 WARN_ON(banks != 0 && b != banks);
1194 banks = b; 1203 banks = b;
1195 if (!bank) { 1204 if (!mce_banks) {
1196 bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); 1205 int err = mce_banks_init();
1197 if (!bank) 1206 if (err)
1198 return -ENOMEM; 1207 return err;
1199 memset(bank, 0xff, banks * sizeof(u64));
1200 } 1208 }
1201 1209
1202 /* Use accurate RIP reporting if available. */ 1210 /* Use accurate RIP reporting if available. */
@@ -1228,15 +1236,16 @@ static void mce_init(void)
1228 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 1236 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
1229 1237
1230 for (i = 0; i < banks; i++) { 1238 for (i = 0; i < banks; i++) {
1231 if (skip_bank_init(i)) 1239 struct mce_bank *b = &mce_banks[i];
1240 if (!b->init)
1232 continue; 1241 continue;
1233 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); 1242 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
1234 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 1243 wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
1235 } 1244 }
1236} 1245}
1237 1246
1238/* Add per CPU specific workarounds here */ 1247/* Add per CPU specific workarounds here */
1239static int mce_cpu_quirks(struct cpuinfo_x86 *c) 1248static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
1240{ 1249{
1241 if (c->x86_vendor == X86_VENDOR_UNKNOWN) { 1250 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
1242 pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); 1251 pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1251,7 +1260,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1251 * trips off incorrectly with the IOMMU & 3ware 1260 * trips off incorrectly with the IOMMU & 3ware
1252 * & Cerberus: 1261 * & Cerberus:
1253 */ 1262 */
1254 clear_bit(10, (unsigned long *)&bank[4]); 1263 clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
1255 } 1264 }
1256 if (c->x86 <= 17 && mce_bootlog < 0) { 1265 if (c->x86 <= 17 && mce_bootlog < 0) {
1257 /* 1266 /*
@@ -1265,7 +1274,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1265 * by default. 1274 * by default.
1266 */ 1275 */
1267 if (c->x86 == 6 && banks > 0) 1276 if (c->x86 == 6 && banks > 0)
1268 bank[0] = 0; 1277 mce_banks[0].ctl = 0;
1269 } 1278 }
1270 1279
1271 if (c->x86_vendor == X86_VENDOR_INTEL) { 1280 if (c->x86_vendor == X86_VENDOR_INTEL) {
@@ -1278,8 +1287,8 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1278 * valid event later, merely don't write CTL0. 1287 * valid event later, merely don't write CTL0.
1279 */ 1288 */
1280 1289
1281 if (c->x86 == 6 && c->x86_model < 0x1A) 1290 if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0)
1282 __set_bit(0, &dont_init_banks); 1291 mce_banks[0].init = 0;
1283 1292
1284 /* 1293 /*
1285 * All newer Intel systems support MCE broadcasting. Enable 1294 * All newer Intel systems support MCE broadcasting. Enable
@@ -1335,7 +1344,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
1335static void mce_init_timer(void) 1344static void mce_init_timer(void)
1336{ 1345{
1337 struct timer_list *t = &__get_cpu_var(mce_timer); 1346 struct timer_list *t = &__get_cpu_var(mce_timer);
1338 int *n = &__get_cpu_var(next_interval); 1347 int *n = &__get_cpu_var(mce_next_interval);
1339 1348
1340 if (mce_ignore_ce) 1349 if (mce_ignore_ce)
1341 return; 1350 return;
@@ -1348,6 +1357,17 @@ static void mce_init_timer(void)
1348 add_timer_on(t, smp_processor_id()); 1357 add_timer_on(t, smp_processor_id());
1349} 1358}
1350 1359
1360/* Handle unconfigured int18 (should never happen) */
1361static void unexpected_machine_check(struct pt_regs *regs, long error_code)
1362{
1363 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
1364 smp_processor_id());
1365}
1366
1367/* Call the installed machine check handler for this CPU setup. */
1368void (*machine_check_vector)(struct pt_regs *, long error_code) =
1369 unexpected_machine_check;
1370
1351/* 1371/*
1352 * Called for each booted CPU to set up machine checks. 1372 * Called for each booted CPU to set up machine checks.
1353 * Must be called with preempt off: 1373 * Must be called with preempt off:
@@ -1561,8 +1581,10 @@ static struct miscdevice mce_log_device = {
1561 */ 1581 */
1562static int __init mcheck_enable(char *str) 1582static int __init mcheck_enable(char *str)
1563{ 1583{
1564 if (*str == 0) 1584 if (*str == 0) {
1565 enable_p5_mce(); 1585 enable_p5_mce();
1586 return 1;
1587 }
1566 if (*str == '=') 1588 if (*str == '=')
1567 str++; 1589 str++;
1568 if (!strcmp(str, "off")) 1590 if (!strcmp(str, "off"))
@@ -1603,8 +1625,9 @@ static int mce_disable(void)
1603 int i; 1625 int i;
1604 1626
1605 for (i = 0; i < banks; i++) { 1627 for (i = 0; i < banks; i++) {
1606 if (!skip_bank_init(i)) 1628 struct mce_bank *b = &mce_banks[i];
1607 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); 1629 if (b->init)
1630 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
1608 } 1631 }
1609 return 0; 1632 return 0;
1610} 1633}
@@ -1679,14 +1702,15 @@ DEFINE_PER_CPU(struct sys_device, mce_dev);
1679__cpuinitdata 1702__cpuinitdata
1680void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 1703void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
1681 1704
1682static struct sysdev_attribute *bank_attrs; 1705static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr)
1706{
1707 return container_of(attr, struct mce_bank, attr);
1708}
1683 1709
1684static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, 1710static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
1685 char *buf) 1711 char *buf)
1686{ 1712{
1687 u64 b = bank[attr - bank_attrs]; 1713 return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
1688
1689 return sprintf(buf, "%llx\n", b);
1690} 1714}
1691 1715
1692static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, 1716static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
@@ -1697,7 +1721,7 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
1697 if (strict_strtoull(buf, 0, &new) < 0) 1721 if (strict_strtoull(buf, 0, &new) < 0)
1698 return -EINVAL; 1722 return -EINVAL;
1699 1723
1700 bank[attr - bank_attrs] = new; 1724 attr_to_bank(attr)->ctl = new;
1701 mce_restart(); 1725 mce_restart();
1702 1726
1703 return size; 1727 return size;
@@ -1839,7 +1863,7 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1839 } 1863 }
1840 for (j = 0; j < banks; j++) { 1864 for (j = 0; j < banks; j++) {
1841 err = sysdev_create_file(&per_cpu(mce_dev, cpu), 1865 err = sysdev_create_file(&per_cpu(mce_dev, cpu),
1842 &bank_attrs[j]); 1866 &mce_banks[j].attr);
1843 if (err) 1867 if (err)
1844 goto error2; 1868 goto error2;
1845 } 1869 }
@@ -1848,10 +1872,10 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1848 return 0; 1872 return 0;
1849error2: 1873error2:
1850 while (--j >= 0) 1874 while (--j >= 0)
1851 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); 1875 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
1852error: 1876error:
1853 while (--i >= 0) 1877 while (--i >= 0)
1854 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); 1878 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
1855 1879
1856 sysdev_unregister(&per_cpu(mce_dev, cpu)); 1880 sysdev_unregister(&per_cpu(mce_dev, cpu));
1857 1881
@@ -1869,7 +1893,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
1869 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); 1893 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1870 1894
1871 for (i = 0; i < banks; i++) 1895 for (i = 0; i < banks; i++)
1872 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); 1896 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
1873 1897
1874 sysdev_unregister(&per_cpu(mce_dev, cpu)); 1898 sysdev_unregister(&per_cpu(mce_dev, cpu));
1875 cpumask_clear_cpu(cpu, mce_dev_initialized); 1899 cpumask_clear_cpu(cpu, mce_dev_initialized);
@@ -1886,8 +1910,9 @@ static void mce_disable_cpu(void *h)
1886 if (!(action & CPU_TASKS_FROZEN)) 1910 if (!(action & CPU_TASKS_FROZEN))
1887 cmci_clear(); 1911 cmci_clear();
1888 for (i = 0; i < banks; i++) { 1912 for (i = 0; i < banks; i++) {
1889 if (!skip_bank_init(i)) 1913 struct mce_bank *b = &mce_banks[i];
1890 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); 1914 if (b->init)
1915 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
1891 } 1916 }
1892} 1917}
1893 1918
@@ -1902,8 +1927,9 @@ static void mce_reenable_cpu(void *h)
1902 if (!(action & CPU_TASKS_FROZEN)) 1927 if (!(action & CPU_TASKS_FROZEN))
1903 cmci_reenable(); 1928 cmci_reenable();
1904 for (i = 0; i < banks; i++) { 1929 for (i = 0; i < banks; i++) {
1905 if (!skip_bank_init(i)) 1930 struct mce_bank *b = &mce_banks[i];
1906 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); 1931 if (b->init)
1932 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
1907 } 1933 }
1908} 1934}
1909 1935
@@ -1935,7 +1961,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
1935 case CPU_DOWN_FAILED: 1961 case CPU_DOWN_FAILED:
1936 case CPU_DOWN_FAILED_FROZEN: 1962 case CPU_DOWN_FAILED_FROZEN:
1937 t->expires = round_jiffies(jiffies + 1963 t->expires = round_jiffies(jiffies +
1938 __get_cpu_var(next_interval)); 1964 __get_cpu_var(mce_next_interval));
1939 add_timer_on(t, cpu); 1965 add_timer_on(t, cpu);
1940 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); 1966 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
1941 break; 1967 break;
@@ -1951,35 +1977,21 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = {
1951 .notifier_call = mce_cpu_callback, 1977 .notifier_call = mce_cpu_callback,
1952}; 1978};
1953 1979
1954static __init int mce_init_banks(void) 1980static __init void mce_init_banks(void)
1955{ 1981{
1956 int i; 1982 int i;
1957 1983
1958 bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
1959 GFP_KERNEL);
1960 if (!bank_attrs)
1961 return -ENOMEM;
1962
1963 for (i = 0; i < banks; i++) { 1984 for (i = 0; i < banks; i++) {
1964 struct sysdev_attribute *a = &bank_attrs[i]; 1985 struct mce_bank *b = &mce_banks[i];
1986 struct sysdev_attribute *a = &b->attr;
1965 1987
1966 a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); 1988 a->attr.name = b->attrname;
1967 if (!a->attr.name) 1989 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
1968 goto nomem;
1969 1990
1970 a->attr.mode = 0644; 1991 a->attr.mode = 0644;
1971 a->show = show_bank; 1992 a->show = show_bank;
1972 a->store = set_bank; 1993 a->store = set_bank;
1973 } 1994 }
1974 return 0;
1975
1976nomem:
1977 while (--i >= 0)
1978 kfree(bank_attrs[i].attr.name);
1979 kfree(bank_attrs);
1980 bank_attrs = NULL;
1981
1982 return -ENOMEM;
1983} 1995}
1984 1996
1985static __init int mce_init_device(void) 1997static __init int mce_init_device(void)
@@ -1992,9 +2004,7 @@ static __init int mce_init_device(void)
1992 2004
1993 zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); 2005 zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
1994 2006
1995 err = mce_init_banks(); 2007 mce_init_banks();
1996 if (err)
1997 return err;
1998 2008
1999 err = sysdev_class_register(&mce_sysclass); 2009 err = sysdev_class_register(&mce_sysclass);
2000 if (err) 2010 if (err)
@@ -2014,57 +2024,65 @@ static __init int mce_init_device(void)
2014 2024
2015device_initcall(mce_init_device); 2025device_initcall(mce_init_device);
2016 2026
2017#else /* CONFIG_X86_OLD_MCE: */ 2027/*
2018 2028 * Old style boot options parsing. Only for compatibility.
2019int nr_mce_banks; 2029 */
2020EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ 2030static int __init mcheck_disable(char *str)
2031{
2032 mce_disabled = 1;
2033 return 1;
2034}
2035__setup("nomce", mcheck_disable);
2021 2036
2022/* This has to be run for each processor */ 2037#ifdef CONFIG_DEBUG_FS
2023void mcheck_init(struct cpuinfo_x86 *c) 2038struct dentry *mce_get_debugfs_dir(void)
2024{ 2039{
2025 if (mce_disabled) 2040 static struct dentry *dmce;
2026 return;
2027 2041
2028 switch (c->x86_vendor) { 2042 if (!dmce)
2029 case X86_VENDOR_AMD: 2043 dmce = debugfs_create_dir("mce", NULL);
2030 amd_mcheck_init(c);
2031 break;
2032 2044
2033 case X86_VENDOR_INTEL: 2045 return dmce;
2034 if (c->x86 == 5) 2046}
2035 intel_p5_mcheck_init(c);
2036 if (c->x86 == 6)
2037 intel_p6_mcheck_init(c);
2038 if (c->x86 == 15)
2039 intel_p4_mcheck_init(c);
2040 break;
2041 2047
2042 case X86_VENDOR_CENTAUR: 2048static void mce_reset(void)
2043 if (c->x86 == 5) 2049{
2044 winchip_mcheck_init(c); 2050 cpu_missing = 0;
2045 break; 2051 atomic_set(&mce_fake_paniced, 0);
2052 atomic_set(&mce_executing, 0);
2053 atomic_set(&mce_callin, 0);
2054 atomic_set(&global_nwo, 0);
2055}
2046 2056
2047 default: 2057static int fake_panic_get(void *data, u64 *val)
2048 break; 2058{
2049 } 2059 *val = fake_panic;
2050 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks); 2060 return 0;
2051} 2061}
2052 2062
2053static int __init mcheck_enable(char *str) 2063static int fake_panic_set(void *data, u64 val)
2054{ 2064{
2055 mce_p5_enabled = 1; 2065 mce_reset();
2056 return 1; 2066 fake_panic = val;
2067 return 0;
2057} 2068}
2058__setup("mce", mcheck_enable);
2059 2069
2060#endif /* CONFIG_X86_OLD_MCE */ 2070DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
2071 fake_panic_set, "%llu\n");
2061 2072
2062/* 2073static int __init mce_debugfs_init(void)
2063 * Old style boot options parsing. Only for compatibility.
2064 */
2065static int __init mcheck_disable(char *str)
2066{ 2074{
2067 mce_disabled = 1; 2075 struct dentry *dmce, *ffake_panic;
2068 return 1; 2076
2077 dmce = mce_get_debugfs_dir();
2078 if (!dmce)
2079 return -ENOMEM;
2080 ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL,
2081 &fake_panic_fops);
2082 if (!ffake_panic)
2083 return -ENOMEM;
2084
2085 return 0;
2069} 2086}
2070__setup("nomce", mcheck_disable); 2087late_initcall(mce_debugfs_init);
2088#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 1fecba404fd8..8cd5224943b5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -69,7 +69,7 @@ struct threshold_bank {
69 struct threshold_block *blocks; 69 struct threshold_block *blocks;
70 cpumask_var_t cpus; 70 cpumask_var_t cpus;
71}; 71};
72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); 72static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
73 73
74#ifdef CONFIG_SMP 74#ifdef CONFIG_SMP
75static unsigned char shared_bank[NR_BANKS] = { 75static unsigned char shared_bank[NR_BANKS] = {
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index e1acec0f7a32..889f665fe93d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -90,7 +90,7 @@ static void cmci_discover(int banks, int boot)
90 if (test_bit(i, owned)) 90 if (test_bit(i, owned))
91 continue; 91 continue;
92 92
93 rdmsrl(MSR_IA32_MC0_CTL2 + i, val); 93 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
94 94
95 /* Already owned by someone else? */ 95 /* Already owned by someone else? */
96 if (val & CMCI_EN) { 96 if (val & CMCI_EN) {
@@ -101,8 +101,8 @@ static void cmci_discover(int banks, int boot)
101 } 101 }
102 102
103 val |= CMCI_EN | CMCI_THRESHOLD; 103 val |= CMCI_EN | CMCI_THRESHOLD;
104 wrmsrl(MSR_IA32_MC0_CTL2 + i, val); 104 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
105 rdmsrl(MSR_IA32_MC0_CTL2 + i, val); 105 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
106 106
107 /* Did the enable bit stick? -- the bank supports CMCI */ 107 /* Did the enable bit stick? -- the bank supports CMCI */
108 if (val & CMCI_EN) { 108 if (val & CMCI_EN) {
@@ -152,9 +152,9 @@ void cmci_clear(void)
152 if (!test_bit(i, __get_cpu_var(mce_banks_owned))) 152 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
153 continue; 153 continue;
154 /* Disable CMCI */ 154 /* Disable CMCI */
155 rdmsrl(MSR_IA32_MC0_CTL2 + i, val); 155 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
156 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); 156 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
157 wrmsrl(MSR_IA32_MC0_CTL2 + i, val); 157 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
158 __clear_bit(i, __get_cpu_var(mce_banks_owned)); 158 __clear_bit(i, __get_cpu_var(mce_banks_owned));
159 } 159 }
160 spin_unlock_irqrestore(&cmci_discover_lock, flags); 160 spin_unlock_irqrestore(&cmci_discover_lock, flags);
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
deleted file mode 100644
index f5f2d6f71fb6..000000000000
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ /dev/null
@@ -1,94 +0,0 @@
1/*
2 * Non Fatal Machine Check Exception Reporting
3 *
4 * (C) Copyright 2002 Dave Jones. <davej@redhat.com>
5 *
6 * This file contains routines to check for non-fatal MCEs every 15s
7 *
8 */
9#include <linux/interrupt.h>
10#include <linux/workqueue.h>
11#include <linux/jiffies.h>
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/init.h>
16#include <linux/smp.h>
17
18#include <asm/processor.h>
19#include <asm/system.h>
20#include <asm/mce.h>
21#include <asm/msr.h>
22
23static int firstbank;
24
25#define MCE_RATE (15*HZ) /* timer rate is 15s */
26
27static void mce_checkregs(void *info)
28{
29 u32 low, high;
30 int i;
31
32 for (i = firstbank; i < nr_mce_banks; i++) {
33 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
34
35 if (!(high & (1<<31)))
36 continue;
37
38 printk(KERN_INFO "MCE: The hardware reports a non fatal, "
39 "correctable incident occurred on CPU %d.\n",
40 smp_processor_id());
41
42 printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
43
44 /*
45 * Scrub the error so we don't pick it up in MCE_RATE
46 * seconds time:
47 */
48 wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
49
50 /* Serialize: */
51 wmb();
52 add_taint(TAINT_MACHINE_CHECK);
53 }
54}
55
56static void mce_work_fn(struct work_struct *work);
57static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
58
59static void mce_work_fn(struct work_struct *work)
60{
61 on_each_cpu(mce_checkregs, NULL, 1);
62 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
63}
64
65static int __init init_nonfatal_mce_checker(void)
66{
67 struct cpuinfo_x86 *c = &boot_cpu_data;
68
69 /* Check for MCE support */
70 if (!cpu_has(c, X86_FEATURE_MCE))
71 return -ENODEV;
72
73 /* Check for PPro style MCA */
74 if (!cpu_has(c, X86_FEATURE_MCA))
75 return -ENODEV;
76
77 /* Some Athlons misbehave when we frob bank 0 */
78 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
79 boot_cpu_data.x86 == 6)
80 firstbank = 1;
81 else
82 firstbank = 0;
83
84 /*
85 * Check for non-fatal errors every MCE_RATE s
86 */
87 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
88 printk(KERN_INFO "Machine check exception polling timer started.\n");
89
90 return 0;
91}
92module_init(init_nonfatal_mce_checker);
93
94MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
deleted file mode 100644
index 4482aea9aa2e..000000000000
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ /dev/null
@@ -1,163 +0,0 @@
1/*
2 * P4 specific Machine Check Exception Reporting
3 */
4#include <linux/kernel.h>
5#include <linux/types.h>
6#include <linux/init.h>
7#include <linux/smp.h>
8
9#include <asm/processor.h>
10#include <asm/mce.h>
11#include <asm/msr.h>
12
13/* as supported by the P4/Xeon family */
14struct intel_mce_extended_msrs {
15 u32 eax;
16 u32 ebx;
17 u32 ecx;
18 u32 edx;
19 u32 esi;
20 u32 edi;
21 u32 ebp;
22 u32 esp;
23 u32 eflags;
24 u32 eip;
25 /* u32 *reserved[]; */
26};
27
28static int mce_num_extended_msrs;
29
30/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
31static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
32{
33 u32 h;
34
35 rdmsr(MSR_IA32_MCG_EAX, r->eax, h);
36 rdmsr(MSR_IA32_MCG_EBX, r->ebx, h);
37 rdmsr(MSR_IA32_MCG_ECX, r->ecx, h);
38 rdmsr(MSR_IA32_MCG_EDX, r->edx, h);
39 rdmsr(MSR_IA32_MCG_ESI, r->esi, h);
40 rdmsr(MSR_IA32_MCG_EDI, r->edi, h);
41 rdmsr(MSR_IA32_MCG_EBP, r->ebp, h);
42 rdmsr(MSR_IA32_MCG_ESP, r->esp, h);
43 rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h);
44 rdmsr(MSR_IA32_MCG_EIP, r->eip, h);
45}
46
47static void intel_machine_check(struct pt_regs *regs, long error_code)
48{
49 u32 alow, ahigh, high, low;
50 u32 mcgstl, mcgsth;
51 int recover = 1;
52 int i;
53
54 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
55 if (mcgstl & (1<<0)) /* Recoverable ? */
56 recover = 0;
57
58 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
59 smp_processor_id(), mcgsth, mcgstl);
60
61 if (mce_num_extended_msrs > 0) {
62 struct intel_mce_extended_msrs dbg;
63
64 intel_get_extended_msrs(&dbg);
65
66 printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
67 "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
68 "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
69 smp_processor_id(), dbg.eip, dbg.eflags,
70 dbg.eax, dbg.ebx, dbg.ecx, dbg.edx,
71 dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
72 }
73
74 for (i = 0; i < nr_mce_banks; i++) {
75 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
76 if (high & (1<<31)) {
77 char misc[20];
78 char addr[24];
79
80 misc[0] = addr[0] = '\0';
81 if (high & (1<<29))
82 recover |= 1;
83 if (high & (1<<25))
84 recover |= 2;
85 high &= ~(1<<31);
86 if (high & (1<<27)) {
87 rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
88 snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
89 }
90 if (high & (1<<26)) {
91 rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
92 snprintf(addr, 24, " at %08x%08x", ahigh, alow);
93 }
94 printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
95 smp_processor_id(), i, high, low, misc, addr);
96 }
97 }
98
99 if (recover & 2)
100 panic("CPU context corrupt");
101 if (recover & 1)
102 panic("Unable to continue");
103
104 printk(KERN_EMERG "Attempting to continue.\n");
105
106 /*
107 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
108 * recoverable/continuable.This will allow BIOS to look at the MSRs
109 * for errors if the OS could not log the error.
110 */
111 for (i = 0; i < nr_mce_banks; i++) {
112 u32 msr;
113 msr = MSR_IA32_MC0_STATUS+i*4;
114 rdmsr(msr, low, high);
115 if (high&(1<<31)) {
116 /* Clear it */
117 wrmsr(msr, 0UL, 0UL);
118 /* Serialize */
119 wmb();
120 add_taint(TAINT_MACHINE_CHECK);
121 }
122 }
123 mcgstl &= ~(1<<2);
124 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
125}
126
127void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
128{
129 u32 l, h;
130 int i;
131
132 machine_check_vector = intel_machine_check;
133 wmb();
134
135 printk(KERN_INFO "Intel machine check architecture supported.\n");
136 rdmsr(MSR_IA32_MCG_CAP, l, h);
137 if (l & (1<<8)) /* Control register present ? */
138 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
139 nr_mce_banks = l & 0xff;
140
141 for (i = 0; i < nr_mce_banks; i++) {
142 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
143 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
144 }
145
146 set_in_cr4(X86_CR4_MCE);
147 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
148 smp_processor_id());
149
150 /* Check for P4/Xeon extended MCE MSRs */
151 rdmsr(MSR_IA32_MCG_CAP, l, h);
152 if (l & (1<<9)) {/* MCG_EXT_P */
153 mce_num_extended_msrs = (l >> 16) & 0xff;
154 printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
155 " available\n",
156 smp_processor_id(), mce_num_extended_msrs);
157
158#ifdef CONFIG_X86_MCE_P4THERMAL
159 /* Check for P4/Xeon Thermal monitor */
160 intel_init_thermal(c);
161#endif
162 }
163}
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
deleted file mode 100644
index 01e4f8178183..000000000000
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ /dev/null
@@ -1,127 +0,0 @@
1/*
2 * P6 specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
4 */
5#include <linux/interrupt.h>
6#include <linux/kernel.h>
7#include <linux/types.h>
8#include <linux/init.h>
9#include <linux/smp.h>
10
11#include <asm/processor.h>
12#include <asm/system.h>
13#include <asm/mce.h>
14#include <asm/msr.h>
15
16/* Machine Check Handler For PII/PIII */
17static void intel_machine_check(struct pt_regs *regs, long error_code)
18{
19 u32 alow, ahigh, high, low;
20 u32 mcgstl, mcgsth;
21 int recover = 1;
22 int i;
23
24 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
25 if (mcgstl & (1<<0)) /* Recoverable ? */
26 recover = 0;
27
28 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
29 smp_processor_id(), mcgsth, mcgstl);
30
31 for (i = 0; i < nr_mce_banks; i++) {
32 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
33 if (high & (1<<31)) {
34 char misc[20];
35 char addr[24];
36
37 misc[0] = '\0';
38 addr[0] = '\0';
39
40 if (high & (1<<29))
41 recover |= 1;
42 if (high & (1<<25))
43 recover |= 2;
44 high &= ~(1<<31);
45
46 if (high & (1<<27)) {
47 rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
48 snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
49 }
50 if (high & (1<<26)) {
51 rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
52 snprintf(addr, 24, " at %08x%08x", ahigh, alow);
53 }
54
55 printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
56 smp_processor_id(), i, high, low, misc, addr);
57 }
58 }
59
60 if (recover & 2)
61 panic("CPU context corrupt");
62 if (recover & 1)
63 panic("Unable to continue");
64
65 printk(KERN_EMERG "Attempting to continue.\n");
66 /*
67 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
68 * recoverable/continuable.This will allow BIOS to look at the MSRs
69 * for errors if the OS could not log the error:
70 */
71 for (i = 0; i < nr_mce_banks; i++) {
72 unsigned int msr;
73
74 msr = MSR_IA32_MC0_STATUS+i*4;
75 rdmsr(msr, low, high);
76 if (high & (1<<31)) {
77 /* Clear it: */
78 wrmsr(msr, 0UL, 0UL);
79 /* Serialize: */
80 wmb();
81 add_taint(TAINT_MACHINE_CHECK);
82 }
83 }
84 mcgstl &= ~(1<<2);
85 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
86}
87
88/* Set up machine check reporting for processors with Intel style MCE: */
89void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
90{
91 u32 l, h;
92 int i;
93
94 /* Check for MCE support */
95 if (!cpu_has(c, X86_FEATURE_MCE))
96 return;
97
98 /* Check for PPro style MCA */
99 if (!cpu_has(c, X86_FEATURE_MCA))
100 return;
101
102 /* Ok machine check is available */
103 machine_check_vector = intel_machine_check;
104 /* Make sure the vector pointer is visible before we enable MCEs: */
105 wmb();
106
107 printk(KERN_INFO "Intel machine check architecture supported.\n");
108 rdmsr(MSR_IA32_MCG_CAP, l, h);
109 if (l & (1<<8)) /* Control register present ? */
110 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
111 nr_mce_banks = l & 0xff;
112
113 /*
114 * Following the example in IA-32 SDM Vol 3:
115 * - MC0_CTL should not be written
116 * - Status registers on all banks should be cleared on reset
117 */
118 for (i = 1; i < nr_mce_banks; i++)
119 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
120
121 for (i = 0; i < nr_mce_banks; i++)
122 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
123
124 set_in_cr4(X86_CR4_MCE);
125 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
126 smp_processor_id());
127}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 5957a93e5173..63a56d147e4a 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -260,9 +260,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
260 return; 260 return;
261 } 261 }
262 262
263 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
264 tm2 = 1;
265
266 /* Check whether a vector already exists */ 263 /* Check whether a vector already exists */
267 if (h & APIC_VECTOR_MASK) { 264 if (h & APIC_VECTOR_MASK) {
268 printk(KERN_DEBUG 265 printk(KERN_DEBUG
@@ -271,6 +268,16 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
271 return; 268 return;
272 } 269 }
273 270
271 /* early Pentium M models use different method for enabling TM2 */
272 if (cpu_has(c, X86_FEATURE_TM2)) {
273 if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
274 rdmsr(MSR_THERM2_CTL, l, h);
275 if (l & MSR_THERM2_CTL_TM_SELECT)
276 tm2 = 1;
277 } else if (l & MSR_IA32_MISC_ENABLE_TM2)
278 tm2 = 1;
279 }
280
274 /* We'll mask the thermal vector in the lapic till we're ready: */ 281 /* We'll mask the thermal vector in the lapic till we're ready: */
275 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; 282 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
276 apic_write(APIC_LVTTHMR, h); 283 apic_write(APIC_LVTTHMR, h);
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 7af0f88a4163..84e83de54575 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -58,6 +58,7 @@ unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
58static DEFINE_MUTEX(mtrr_mutex); 58static DEFINE_MUTEX(mtrr_mutex);
59 59
60u64 size_or_mask, size_and_mask; 60u64 size_or_mask, size_and_mask;
61static bool mtrr_aps_delayed_init;
61 62
62static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; 63static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
63 64
@@ -163,7 +164,10 @@ static void ipi_handler(void *info)
163 if (data->smp_reg != ~0U) { 164 if (data->smp_reg != ~0U) {
164 mtrr_if->set(data->smp_reg, data->smp_base, 165 mtrr_if->set(data->smp_reg, data->smp_base,
165 data->smp_size, data->smp_type); 166 data->smp_size, data->smp_type);
166 } else { 167 } else if (mtrr_aps_delayed_init) {
168 /*
169 * Initialize the MTRRs inaddition to the synchronisation.
170 */
167 mtrr_if->set_all(); 171 mtrr_if->set_all();
168 } 172 }
169 173
@@ -265,6 +269,8 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
265 */ 269 */
266 if (reg != ~0U) 270 if (reg != ~0U)
267 mtrr_if->set(reg, base, size, type); 271 mtrr_if->set(reg, base, size, type);
272 else if (!mtrr_aps_delayed_init)
273 mtrr_if->set_all();
268 274
269 /* Wait for the others */ 275 /* Wait for the others */
270 while (atomic_read(&data.count)) 276 while (atomic_read(&data.count))
@@ -721,9 +727,7 @@ void __init mtrr_bp_init(void)
721 727
722void mtrr_ap_init(void) 728void mtrr_ap_init(void)
723{ 729{
724 unsigned long flags; 730 if (!use_intel() || mtrr_aps_delayed_init)
725
726 if (!mtrr_if || !use_intel())
727 return; 731 return;
728 /* 732 /*
729 * Ideally we should hold mtrr_mutex here to avoid mtrr entries 733 * Ideally we should hold mtrr_mutex here to avoid mtrr entries
@@ -738,11 +742,7 @@ void mtrr_ap_init(void)
738 * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug 742 * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
739 * lock to prevent mtrr entry changes 743 * lock to prevent mtrr entry changes
740 */ 744 */
741 local_irq_save(flags); 745 set_mtrr(~0U, 0, 0, 0);
742
743 mtrr_if->set_all();
744
745 local_irq_restore(flags);
746} 746}
747 747
748/** 748/**
@@ -753,6 +753,34 @@ void mtrr_save_state(void)
753 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); 753 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
754} 754}
755 755
756void set_mtrr_aps_delayed_init(void)
757{
758 if (!use_intel())
759 return;
760
761 mtrr_aps_delayed_init = true;
762}
763
764/*
765 * MTRR initialization for all AP's
766 */
767void mtrr_aps_init(void)
768{
769 if (!use_intel())
770 return;
771
772 set_mtrr(~0U, 0, 0, 0);
773 mtrr_aps_delayed_init = false;
774}
775
776void mtrr_bp_restore(void)
777{
778 if (!use_intel())
779 return;
780
781 mtrr_if->set_all();
782}
783
756static int __init mtrr_init_finialize(void) 784static int __init mtrr_init_finialize(void)
757{ 785{
758 if (!mtrr_if) 786 if (!mtrr_if)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f9cd0849bd42..2732e2c1e4d3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1211,7 +1211,7 @@ amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
1211 x86_pmu_disable_counter(hwc, idx); 1211 x86_pmu_disable_counter(hwc, idx);
1212} 1212}
1213 1213
1214static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); 1214static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
1215 1215
1216/* 1216/*
1217 * Set the next IRQ period, based on the hwc->period_left value. 1217 * Set the next IRQ period, based on the hwc->period_left value.
@@ -1253,7 +1253,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
1253 if (left > x86_pmu.max_period) 1253 if (left > x86_pmu.max_period)
1254 left = x86_pmu.max_period; 1254 left = x86_pmu.max_period;
1255 1255
1256 per_cpu(prev_left[idx], smp_processor_id()) = left; 1256 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
1257 1257
1258 /* 1258 /*
1259 * The hw counter starts counting from this counter offset, 1259 * The hw counter starts counting from this counter offset,
@@ -1470,7 +1470,7 @@ void perf_counter_print_debug(void)
1470 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1470 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1471 rdmsrl(x86_pmu.perfctr + idx, pmc_count); 1471 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
1472 1472
1473 prev_left = per_cpu(prev_left[idx], cpu); 1473 prev_left = per_cpu(pmc_prev_left[idx], cpu);
1474 1474
1475 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", 1475 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
1476 cpu, idx, pmc_ctrl); 1476 cpu, idx, pmc_ctrl);
@@ -2110,8 +2110,8 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2110 entry->ip[entry->nr++] = ip; 2110 entry->ip[entry->nr++] = ip;
2111} 2111}
2112 2112
2113static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); 2113static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
2114static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); 2114static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2115static DEFINE_PER_CPU(int, in_nmi_frame); 2115static DEFINE_PER_CPU(int, in_nmi_frame);
2116 2116
2117 2117
@@ -2264,9 +2264,9 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2264 struct perf_callchain_entry *entry; 2264 struct perf_callchain_entry *entry;
2265 2265
2266 if (in_nmi()) 2266 if (in_nmi())
2267 entry = &__get_cpu_var(nmi_entry); 2267 entry = &__get_cpu_var(pmc_nmi_entry);
2268 else 2268 else
2269 entry = &__get_cpu_var(irq_entry); 2269 entry = &__get_cpu_var(pmc_irq_entry);
2270 2270
2271 entry->nr = 0; 2271 entry->nr = 0;
2272 2272
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c
new file mode 100644
index 000000000000..a640ae5ad201
--- /dev/null
+++ b/arch/x86/kernel/cpu/sched.c
@@ -0,0 +1,55 @@
1#include <linux/sched.h>
2#include <linux/math64.h>
3#include <linux/percpu.h>
4#include <linux/irqflags.h>
5
6#include <asm/cpufeature.h>
7#include <asm/processor.h>
8
9#ifdef CONFIG_SMP
10
11static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched);
12
13static unsigned long scale_aperfmperf(void)
14{
15 struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched);
16 unsigned long ratio, flags;
17
18 local_irq_save(flags);
19 get_aperfmperf(&val);
20 local_irq_restore(flags);
21
22 ratio = calc_aperfmperf_ratio(old, &val);
23 *old = val;
24
25 return ratio;
26}
27
28unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
29{
30 /*
31 * do aperf/mperf on the cpu level because it includes things
32 * like turbo mode, which are relevant to full cores.
33 */
34 if (boot_cpu_has(X86_FEATURE_APERFMPERF))
35 return scale_aperfmperf();
36
37 /*
38 * maybe have something cpufreq here
39 */
40
41 return default_scale_freq_power(sd, cpu);
42}
43
44unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu)
45{
46 /*
47 * aperf/mperf already includes the smt gain
48 */
49 if (boot_cpu_has(X86_FEATURE_APERFMPERF))
50 return SCHED_LOAD_SCALE;
51
52 return default_scale_smt_power(sd, cpu);
53}
54
55#endif