10 files changed, 946 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
new file mode 100644
index 000000000000..f1ebe1c1c17a
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -0,0 +1,2 @@
+obj-y   =       mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o
+obj-$(CONFIG_X86_MCE_NONFATAL)  +=      non-fatal.o
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
new file mode 100644
index 000000000000..eef63e3630c2
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -0,0 +1,102 @@
+/*
+ * Athlon/Hammer specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk>
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+#include "mce.h"
+/* Machine Check Handler For AMD Athlon/Duron */
+static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
+{
+        int recover=1;
+        u32 alow, ahigh, high, low;
+        u32 mcgstl, mcgsth;
+        int i;
+        rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+        if (mcgstl & (1<<0))    /* Recoverable ? */
+                recover=0;
+        printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+                smp_processor_id(), mcgsth, mcgstl);
+        for (i=1; i<nr_mce_banks; i++) {
+                rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+                if (high&(1<<31)) {
+                        if (high & (1<<29))
+                                recover |= 1;
+                        if (high & (1<<25))
+                                recover |= 2;
+                        printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+                        high &= ~(1<<31);
+                        if (high & (1<<27)) {
+                                rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+                                printk ("[%08x%08x]", ahigh, alow);
+                        }
+                        if (high & (1<<26)) {
+                                rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+                                printk (" at %08x%08x", ahigh, alow);
+                        }
+                        printk ("\n");
+                        /* Clear it */
+                        wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+                        /* Serialize */
+                        wmb();
+                        add_taint(TAINT_MACHINE_CHECK);
+                }
+        }
+        if (recover&2)
+                panic ("CPU context corrupt");
+        if (recover&1)
+                panic ("Unable to continue");
+        printk (KERN_EMERG "Attempting to continue.\n");
+        mcgstl &= ~(1<<2);
+        wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+/* AMD K7 machine check is Intel like */
+void amd_mcheck_init(struct cpuinfo_x86 *c)
+{
+        u32 l, h;
+        int i;
+        if (!cpu_has(c, X86_FEATURE_MCE))
+                return;
+        machine_check_vector = k7_machine_check;
+        wmb();
+        printk (KERN_INFO "Intel machine check architecture supported.\n");
+        rdmsr (MSR_IA32_MCG_CAP, l, h);
+        if (l & (1<<8)) /* Control register present ? */
+                wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+        nr_mce_banks = l & 0xff;
+        /* Clear status for MC index 0 separately, we don't touch CTL,
+         * as some K7 Athlons cause spurious MCEs when its enabled. */
+        if (boot_cpu_data.x86 == 6) {
+                wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
+                i = 1;
+        } else
+                i = 0;
+        for (; i<nr_mce_banks; i++) {
+                wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+                wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+        }
+        set_in_cr4 (X86_CR4_MCE);
+        printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+                smp_processor_id());
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
new file mode 100644
index 000000000000..34c781eddee4
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -0,0 +1,90 @@
+/*
+ * mce.c - x86 Machine Check Exception Reporting
+ * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk>
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/mce.h>
+#include "mce.h"
+int mce_disabled = 0;
+int nr_mce_banks;
+EXPORT_SYMBOL_GPL(nr_mce_banks);        /* non-fatal.o */
+/* Handle unconfigured int18 (should never happen) */
+static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code)
+{       
+        printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
+}
+/* Call the installed machine check handler for this CPU setup. */
+void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
+/* This has to be run for each processor */
+void mcheck_init(struct cpuinfo_x86 *c)
+{
+        if (mce_disabled==1)
+                return;
+        switch (c->x86_vendor) {
+                case X86_VENDOR_AMD:
+                        amd_mcheck_init(c);
+                        break;
+                case X86_VENDOR_INTEL:
+                        if (c->x86==5)
+                                intel_p5_mcheck_init(c);
+                        if (c->x86==6)
+                                intel_p6_mcheck_init(c);
+                        if (c->x86==15)
+                                intel_p4_mcheck_init(c);
+                        break;
+                case X86_VENDOR_CENTAUR:
+                        if (c->x86==5)
+                                winchip_mcheck_init(c);
+                        break;
+                default:
+                        break;
+        }
+}
+static unsigned long old_cr4 __initdata;
+void __init stop_mce(void)
+{
+        old_cr4 = read_cr4();
+        clear_in_cr4(X86_CR4_MCE);
+}
+void __init restart_mce(void)
+{
+        if (old_cr4 & X86_CR4_MCE)
+                set_in_cr4(X86_CR4_MCE);
+}
+static int __init mcheck_disable(char *str)
+{
+        mce_disabled = 1;
+        return 1;
+}
+static int __init mcheck_enable(char *str)
+{
+        mce_disabled = -1;
+        return 1;
+}
+__setup("nomce", mcheck_disable);
+__setup("mce", mcheck_enable);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
new file mode 100644
index 000000000000..81fb6e2d35f3
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce.h
@@ -0,0 +1,14 @@
+#include <linux/init.h>
+#include <asm/mce.h>
+void amd_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
+void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
+void winchip_mcheck_init(struct cpuinfo_x86 *c);
+/* Call the installed machine check handler for this CPU setup. */
+extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code);
+extern int nr_mce_banks;
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
new file mode 100644
index 000000000000..bf39409b3838
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -0,0 +1,91 @@
+/*
+ * Non Fatal Machine Check Exception Reporting
+ *
+ * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
+ *
+ * This file contains routines to check for non-fatal MCEs every 15s
+ *
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+#include "mce.h"
+static int firstbank;
+#define MCE_RATE        15*HZ   /* timer rate is 15s */
+static void mce_checkregs (void *info)
+{
+        u32 low, high;
+        int i;
+        for (i=firstbank; i<nr_mce_banks; i++) {
+                rdmsr (MSR_IA32_MC0_STATUS+i*4, low, high);
+                if (high & (1<<31)) {
+                        printk(KERN_INFO "MCE: The hardware reports a non "
+                                "fatal, correctable incident occurred on "
+                                "CPU %d.\n",
+                                smp_processor_id());
+                        printk (KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
+                        /* Scrub the error so we don't pick it up in MCE_RATE seconds time. */
+                        wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+                        /* Serialize */
+                        wmb();
+                        add_taint(TAINT_MACHINE_CHECK);
+                }
+        }
+}
+static void mce_work_fn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
+static void mce_work_fn(struct work_struct *work)
+{ 
+        on_each_cpu(mce_checkregs, NULL, 1, 1);
+        schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
+} 
+static int __init init_nonfatal_mce_checker(void)
+{
+        struct cpuinfo_x86 *c = &boot_cpu_data;
+        /* Check for MCE support */
+        if (!cpu_has(c, X86_FEATURE_MCE))
+                return -ENODEV;
+        /* Check for PPro style MCA */
+        if (!cpu_has(c, X86_FEATURE_MCA))
+                return -ENODEV;
+        /* Some Athlons misbehave when we frob bank 0 */
+        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+                boot_cpu_data.x86 == 6)
+                        firstbank = 1;
+        else
+                        firstbank = 0;
+        /*
+         * Check for non-fatal errors every MCE_RATE s
+         */
+        schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
+        printk(KERN_INFO "Machine check exception polling timer started.\n");
+        return 0;
+}
+module_init(init_nonfatal_mce_checker);
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
new file mode 100644
index 000000000000..1509edfb2313
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -0,0 +1,253 @@
+/*
+ * P4 specific Machine Check Exception Reporting
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <asm/therm_throt.h>
+#include "mce.h"
+/* as supported by the P4/Xeon family */
+struct intel_mce_extended_msrs {
+        u32 eax;
+        u32 ebx;
+        u32 ecx;
+        u32 edx;
+        u32 esi;
+        u32 edi;
+        u32 ebp;
+        u32 esp;
+        u32 eflags;
+        u32 eip;
+        /* u32 *reserved[]; */
+};
+static int mce_num_extended_msrs = 0;
+#ifdef CONFIG_X86_MCE_P4THERMAL
+static void unexpected_thermal_interrupt(struct pt_regs *regs)
+{       
+        printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
+                        smp_processor_id());
+        add_taint(TAINT_MACHINE_CHECK);
+}
+/* P4/Xeon Thermal transition interrupt handler */
+static void intel_thermal_interrupt(struct pt_regs *regs)
+{
+        __u64 msr_val;
+        ack_APIC_irq();
+        rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
+        therm_throt_process(msr_val & 0x1);
+}
+/* Thermal interrupt handler for this CPU setup */
+static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
+fastcall void smp_thermal_interrupt(struct pt_regs *regs)
+{
+        irq_enter();
+        vendor_thermal_interrupt(regs);
+        irq_exit();
+}
+/* P4/Xeon Thermal regulation detect and init */
+static void intel_init_thermal(struct cpuinfo_x86 *c)
+{
+        u32 l, h;
+        unsigned int cpu = smp_processor_id();
+        /* Thermal monitoring */
+        if (!cpu_has(c, X86_FEATURE_ACPI))
+                return; /* -ENODEV */
+        /* Clock modulation */
+        if (!cpu_has(c, X86_FEATURE_ACC))
+                return; /* -ENODEV */
+        /* first check if its enabled already, in which case there might
+         * be some SMM goo which handles it, so we can't even put a handler
+         * since it might be delivered via SMI already -zwanem.
+         */
+        rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+        h = apic_read(APIC_LVTTHMR);
+        if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
+                printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
+                                cpu);
+                return; /* -EBUSY */
+        }
+        /* check whether a vector already exists, temporarily masked? */        
+        if (h & APIC_VECTOR_MASK) {
+                printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
+                                "installed\n",
+                        cpu, (h & APIC_VECTOR_MASK));
+                return; /* -EBUSY */
+        }
+        /* The temperature transition interrupt handler setup */
+        h = THERMAL_APIC_VECTOR;                /* our delivery vector */
+        h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
+        apic_write_around(APIC_LVTTHMR, h);
+        rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
+        wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
+        /* ok we're good to go... */
+        vendor_thermal_interrupt = intel_thermal_interrupt;
+        
+        rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+        wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+        l = apic_read (APIC_LVTTHMR);
+        apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+        printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
+        /* enable thermal throttle processing */
+        atomic_set(&therm_throt_en, 1);
+        return;
+}
+#endif /* CONFIG_X86_MCE_P4THERMAL */
+/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
+static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
+{
+        u32 h;
+        rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
+        rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
+        rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
+        rdmsr (MSR_IA32_MCG_EDX, r->edx, h);
+        rdmsr (MSR_IA32_MCG_ESI, r->esi, h);
+        rdmsr (MSR_IA32_MCG_EDI, r->edi, h);
+        rdmsr (MSR_IA32_MCG_EBP, r->ebp, h);
+        rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
+        rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
+        rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
+}
+static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
+{
+        int recover=1;
+        u32 alow, ahigh, high, low;
+        u32 mcgstl, mcgsth;
+        int i;
+        rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+        if (mcgstl & (1<<0))    /* Recoverable ? */
+                recover=0;
+        printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+                smp_processor_id(), mcgsth, mcgstl);
+        if (mce_num_extended_msrs > 0) {
+                struct intel_mce_extended_msrs dbg;
+                intel_get_extended_msrs(&dbg);
+                printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
+                        smp_processor_id(), dbg.eip, dbg.eflags);
+                printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n",
+                        dbg.eax, dbg.ebx, dbg.ecx, dbg.edx);
+                printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
+                        dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
+        }
+        for (i=0; i<nr_mce_banks; i++) {
+                rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+                if (high & (1<<31)) {
+                        if (high & (1<<29))
+                                recover |= 1;
+                        if (high & (1<<25))
+                                recover |= 2;
+                        printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+                        high &= ~(1<<31);
+                        if (high & (1<<27)) {
+                                rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+                                printk ("[%08x%08x]", ahigh, alow);
+                        }
+                        if (high & (1<<26)) {
+                                rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+                                printk (" at %08x%08x", ahigh, alow);
+                        }
+                        printk ("\n");
+                }
+        }
+        if (recover & 2)
+                panic ("CPU context corrupt");
+        if (recover & 1)
+                panic ("Unable to continue");
+        printk(KERN_EMERG "Attempting to continue.\n");
+        /* 
+         * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
+         * recoverable/continuable.This will allow BIOS to look at the MSRs
+         * for errors if the OS could not log the error.
+         */
+        for (i=0; i<nr_mce_banks; i++) {
+                u32 msr;
+                msr = MSR_IA32_MC0_STATUS+i*4;
+                rdmsr (msr, low, high);
+                if (high&(1<<31)) {
+                        /* Clear it */
+                        wrmsr(msr, 0UL, 0UL);
+                        /* Serialize */
+                        wmb();
+                        add_taint(TAINT_MACHINE_CHECK);
+                }
+        }
+        mcgstl &= ~(1<<2);
+        wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
+{
+        u32 l, h;
+        int i;
+        
+        machine_check_vector = intel_machine_check;
+        wmb();
+        printk (KERN_INFO "Intel machine check architecture supported.\n");
+        rdmsr (MSR_IA32_MCG_CAP, l, h);
+        if (l & (1<<8)) /* Control register present ? */
+                wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+        nr_mce_banks = l & 0xff;
+        for (i=0; i<nr_mce_banks; i++) {
+                wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+                wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+        }
+        set_in_cr4 (X86_CR4_MCE);
+        printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+                smp_processor_id());
+        /* Check for P4/Xeon extended MCE MSRs */
+        rdmsr (MSR_IA32_MCG_CAP, l, h);
+        if (l & (1<<9)) {/* MCG_EXT_P */
+                mce_num_extended_msrs = (l >> 16) & 0xff;
+                printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
+                                " available\n",
+                        smp_processor_id(), mce_num_extended_msrs);
+#ifdef CONFIG_X86_MCE_P4THERMAL
+                /* Check for P4/Xeon Thermal monitor */
+                intel_init_thermal(c);
+#endif
+        }
+}
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
new file mode 100644
index 000000000000..94bc43d950cf
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -0,0 +1,53 @@
+/*
+ * P5 specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Alan Cox <alan@redhat.com>
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+#include "mce.h"
+/* Machine check handler for Pentium class Intel */
+static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code)
+{
+        u32 loaddr, hi, lotype;
+        rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
+        rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
+        printk(KERN_EMERG "CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype);
+        if(lotype&(1<<5))
+                printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id());
+        add_taint(TAINT_MACHINE_CHECK);
+}
+/* Set up machine check reporting for processors with Intel style MCE */
+void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
+{
+        u32 l, h;
+        
+        /*Check for MCE support */
+        if( !cpu_has(c, X86_FEATURE_MCE) )
+                return; 
+        /* Default P5 to off as its often misconnected */
+        if(mce_disabled != -1)
+                return;
+        machine_check_vector = pentium_machine_check;
+        wmb();
+        /* Read registers before enabling */
+        rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
+        rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
+        printk(KERN_INFO "Intel old style machine check architecture supported.\n");
+        /* Enable MCE */
+        set_in_cr4(X86_CR4_MCE);
+        printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
+}
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
new file mode 100644
index 000000000000..deeae42ce199
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -0,0 +1,119 @@
+/*
+ * P6 specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Alan Cox <alan@redhat.com>
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+#include "mce.h"
+/* Machine Check Handler For PII/PIII */
+static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
+{
+        int recover=1;
+        u32 alow, ahigh, high, low;
+        u32 mcgstl, mcgsth;
+        int i;
+        rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+        if (mcgstl & (1<<0))    /* Recoverable ? */
+                recover=0;
+        printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
+                smp_processor_id(), mcgsth, mcgstl);
+        for (i=0; i<nr_mce_banks; i++) {
+                rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
+                if (high & (1<<31)) {
+                        if (high & (1<<29))
+                                recover |= 1;
+                        if (high & (1<<25))
+                                recover |= 2;
+                        printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+                        high &= ~(1<<31);
+                        if (high & (1<<27)) {
+                                rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+                                printk ("[%08x%08x]", ahigh, alow);
+                        }
+                        if (high & (1<<26)) {
+                                rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+                                printk (" at %08x%08x", ahigh, alow);
+                        }
+                        printk ("\n");
+                }
+        }
+        if (recover & 2)
+                panic ("CPU context corrupt");
+        if (recover & 1)
+                panic ("Unable to continue");
+        printk (KERN_EMERG "Attempting to continue.\n");
+        /* 
+         * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
+         * recoverable/continuable.This will allow BIOS to look at the MSRs
+         * for errors if the OS could not log the error.
+         */
+        for (i=0; i<nr_mce_banks; i++) {
+                unsigned int msr;
+                msr = MSR_IA32_MC0_STATUS+i*4;
+                rdmsr (msr,low, high);
+                if (high & (1<<31)) {
+                        /* Clear it */
+                        wrmsr (msr, 0UL, 0UL);
+                        /* Serialize */
+                        wmb();
+                        add_taint(TAINT_MACHINE_CHECK);
+                }
+        }
+        mcgstl &= ~(1<<2);
+        wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+/* Set up machine check reporting for processors with Intel style MCE */
+void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
+{
+        u32 l, h;
+        int i;
+        
+        /* Check for MCE support */
+        if (!cpu_has(c, X86_FEATURE_MCE))
+                return;
+        /* Check for PPro style MCA */
+        if (!cpu_has(c, X86_FEATURE_MCA))
+                return;
+        /* Ok machine check is available */
+        machine_check_vector = intel_machine_check;
+        wmb();
+        printk (KERN_INFO "Intel machine check architecture supported.\n");
+        rdmsr (MSR_IA32_MCG_CAP, l, h);
+        if (l & (1<<8)) /* Control register present ? */
+                wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+        nr_mce_banks = l & 0xff;
+        /*
+         * Following the example in IA-32 SDM Vol 3:
+         * - MC0_CTL should not be written
+         * - Status registers on all banks should be cleared on reset
+         */
+        for (i=1; i<nr_mce_banks; i++)
+                wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+        for (i=0; i<nr_mce_banks; i++)
+                wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+        set_in_cr4 (X86_CR4_MCE);
+        printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+                smp_processor_id());
+}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
new file mode 100644
index 000000000000..1203dc5ab87a
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -0,0 +1,186 @@
+/*
+ * linux/arch/i386/kernel/cpu/mcheck/therm_throt.c
+ *
+ * Thermal throttle event support code (such as syslog messaging and rate
+ * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
+ * This allows consistent reporting of CPU thermal throttle events.
+ *
+ * Maintains a counter in /sys that keeps track of the number of thermal
+ * events, such that the user knows how bad the thermal problem might be
+ * (since the logging to syslog and mcelog is rate limited).
+ *
+ * Author: Dmitriy Zavin (dmitriyz@google.com)
+ *
+ * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
+ *          Inspired by Ross Biro's and Al Borchers' counter code.
+ */
+#include <linux/percpu.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <asm/cpu.h>
+#include <linux/notifier.h>
+#include <linux/jiffies.h>
+#include <asm/therm_throt.h>
+/* How long to wait between reporting thermal events */
+#define CHECK_INTERVAL              (300 * HZ)
+static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
+static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
+atomic_t therm_throt_en = ATOMIC_INIT(0);
+#ifdef CONFIG_SYSFS
+#define define_therm_throt_sysdev_one_ro(_name)                              \
+        static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
+#define define_therm_throt_sysdev_show_func(name)                            \
+static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev,        \
+                                              char *buf)                     \
+{                                                                            \
+        unsigned int cpu = dev->id;                                          \
+        ssize_t ret;                                                         \
+                                                                             \
+        preempt_disable();              /* CPU hotplug */                    \
+        if (cpu_online(cpu))                                                 \
+                ret = sprintf(buf, "%lu\n",                                  \
+                              per_cpu(thermal_throttle_##name, cpu));        \
+        else                                                                 \
+                ret = 0;                                                     \
+        preempt_enable();                                                    \
+                                                                             \
+        return ret;                                                          \
+}
+define_therm_throt_sysdev_show_func(count);
+define_therm_throt_sysdev_one_ro(count);
+static struct attribute *thermal_throttle_attrs[] = {
+        &attr_count.attr,
+        NULL
+};
+static struct attribute_group thermal_throttle_attr_group = {
+        .attrs = thermal_throttle_attrs,
+        .name = "thermal_throttle"
+};
+#endif /* CONFIG_SYSFS */
+/***
+ * therm_throt_process - Process thermal throttling event from interrupt
+ * @curr: Whether the condition is current or not (boolean), since the
+ *        thermal interrupt normally gets called both when the thermal
+ *        event begins and once the event has ended.
+ *
+ * This function is called by the thermal interrupt after the
+ * IRQ has been acknowledged.
+ *
+ * It will take care of rate limiting and printing messages to the syslog.
+ *
+ * Returns: 0 : Event should NOT be further logged, i.e. still in
+ *              "timeout" from previous log message.
+ *          1 : Event should be logged further, and a message has been
+ *              printed to the syslog.
+ */
+int therm_throt_process(int curr)
+{
+        unsigned int cpu = smp_processor_id();
+        __u64 tmp_jiffs = get_jiffies_64();
+        if (curr)
+                __get_cpu_var(thermal_throttle_count)++;
+        if (time_before64(tmp_jiffs, __get_cpu_var(next_check)))
+                return 0;
+        __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
+        /* if we just entered the thermal event */
+        if (curr) {
+                printk(KERN_CRIT "CPU%d: Temperature above threshold, "
+                       "cpu clock throttled (total events = %lu)\n", cpu,
+                       __get_cpu_var(thermal_throttle_count));
+                add_taint(TAINT_MACHINE_CHECK);
+        } else {
+                printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
+        }
+        return 1;
+}
+#ifdef CONFIG_SYSFS
+/* Add/Remove thermal_throttle interface for CPU device */
+static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
+{
+        return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+}
+static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
+{
+        return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+}
+/* Mutex protecting device creation against CPU hotplug */
+static DEFINE_MUTEX(therm_cpu_lock);
+/* Get notified when a cpu comes on/off. Be hotplug friendly. */
+static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
+                                                   unsigned long action,
+                                                   void *hcpu)
+{
+        unsigned int cpu = (unsigned long)hcpu;
+        struct sys_device *sys_dev;
+        int err;
+        sys_dev = get_cpu_sysdev(cpu);
+        switch (action) {
+        case CPU_ONLINE:
+        case CPU_ONLINE_FROZEN:
+                mutex_lock(&therm_cpu_lock);
+                err = thermal_throttle_add_dev(sys_dev);
+                mutex_unlock(&therm_cpu_lock);
+                WARN_ON(err);
+                break;
+        case CPU_DEAD:
+        case CPU_DEAD_FROZEN:
+                mutex_lock(&therm_cpu_lock);
+                thermal_throttle_remove_dev(sys_dev);
+                mutex_unlock(&therm_cpu_lock);
+                break;
+        }
+        return NOTIFY_OK;
+}
+static struct notifier_block thermal_throttle_cpu_notifier =
+{
+        .notifier_call = thermal_throttle_cpu_callback,
+};
+static __init int thermal_throttle_init_device(void)
+{
+        unsigned int cpu = 0;
+        int err;
+        if (!atomic_read(&therm_throt_en))
+                return 0;
+        register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
+#ifdef CONFIG_HOTPLUG_CPU
+        mutex_lock(&therm_cpu_lock);
+#endif
+        /* connect live CPUs to sysfs */
+        for_each_online_cpu(cpu) {
+                err = thermal_throttle_add_dev(get_cpu_sysdev(cpu));
+                WARN_ON(err);
+        }
+#ifdef CONFIG_HOTPLUG_CPU
+        mutex_unlock(&therm_cpu_lock);
+#endif
+        return 0;
+}
+device_initcall(thermal_throttle_init_device);
+#endif /* CONFIG_SYSFS */
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
new file mode 100644
index 000000000000..9e424b6c293d
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -0,0 +1,36 @@
+/*
+ * IDT Winchip specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Alan Cox <alan@redhat.com>
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <asm/processor.h> 
+#include <asm/system.h>
+#include <asm/msr.h>
+#include "mce.h"
+/* Machine check handler for WinChip C6 */
+static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code)
+{
+        printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
+        add_taint(TAINT_MACHINE_CHECK);
+}
+/* Set up machine check reporting on the Winchip C6 series */
+void winchip_mcheck_init(struct cpuinfo_x86 *c)
+{
+        u32 lo, hi;
+        machine_check_vector = winchip_machine_check;
+        wmb();
+        rdmsr(MSR_IDT_FCR1, lo, hi);
+        lo|= (1<<2);    /* Enable EIERRINT (int 18 MCE) */
+        lo&= ~(1<<4);   /* Enable MCE */
+        wrmsr(MSR_IDT_FCR1, lo, hi);
+        set_in_cr4(X86_CR4_MCE);
+        printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");
+}