diff options
39 files changed, 2978 insertions, 1668 deletions
diff --git a/Documentation/Changes b/Documentation/Changes index b95082be4d5e..d21b3b5aa543 100644 --- a/Documentation/Changes +++ b/Documentation/Changes | |||
| @@ -48,6 +48,7 @@ o procps 3.2.0 # ps --version | |||
| 48 | o oprofile 0.9 # oprofiled --version | 48 | o oprofile 0.9 # oprofiled --version |
| 49 | o udev 081 # udevinfo -V | 49 | o udev 081 # udevinfo -V |
| 50 | o grub 0.93 # grub --version | 50 | o grub 0.93 # grub --version |
| 51 | o mcelog 0.6 | ||
| 51 | 52 | ||
| 52 | Kernel compilation | 53 | Kernel compilation |
| 53 | ================== | 54 | ================== |
| @@ -276,6 +277,16 @@ before running exportfs or mountd. It is recommended that all NFS | |||
| 276 | services be protected from the internet-at-large by a firewall where | 277 | services be protected from the internet-at-large by a firewall where |
| 277 | that is possible. | 278 | that is possible. |
| 278 | 279 | ||
| 280 | mcelog | ||
| 281 | ------ | ||
| 282 | |||
| 283 | In Linux 2.6.31+ the i386 kernel needs to run the mcelog utility | ||
| 284 | as a regular cronjob similar to the x86-64 kernel to process and log | ||
| 285 | machine check events when CONFIG_X86_NEW_MCE is enabled. Machine check | ||
| 286 | events are errors reported by the CPU. Processing them is strongly encouraged. | ||
| 287 | All x86-64 kernels since 2.6.4 require the mcelog utility to | ||
| 288 | process machine checks. | ||
| 289 | |||
| 279 | Getting updated software | 290 | Getting updated software |
| 280 | ======================== | 291 | ======================== |
| 281 | 292 | ||
| @@ -365,6 +376,10 @@ FUSE | |||
| 365 | ---- | 376 | ---- |
| 366 | o <http://sourceforge.net/projects/fuse> | 377 | o <http://sourceforge.net/projects/fuse> |
| 367 | 378 | ||
| 379 | mcelog | ||
| 380 | ------ | ||
| 381 | o <ftp://ftp.kernel.org/pub/linux/utils/cpu/mce/mcelog/> | ||
| 382 | |||
| 368 | Networking | 383 | Networking |
| 369 | ********** | 384 | ********** |
| 370 | 385 | ||
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index de491a3e2313..ec9ef5d0d7b3 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
| @@ -437,3 +437,13 @@ Why: Superseded by tdfxfb. I2C/DDC support used to live in a separate | |||
| 437 | driver but this caused driver conflicts. | 437 | driver but this caused driver conflicts. |
| 438 | Who: Jean Delvare <khali@linux-fr.org> | 438 | Who: Jean Delvare <khali@linux-fr.org> |
| 439 | Krzysztof Helt <krzysztof.h1@wp.pl> | 439 | Krzysztof Helt <krzysztof.h1@wp.pl> |
| 440 | |||
| 441 | ---------------------------- | ||
| 442 | |||
| 443 | What: CONFIG_X86_OLD_MCE | ||
| 444 | When: 2.6.32 | ||
| 445 | Why: Remove the old legacy 32bit machine check code. This has been | ||
| 446 | superseded by the newer machine check code from the 64bit port, | ||
| 447 | but the old version has been kept around for easier testing. Note this | ||
| 448 | doesn't impact the old P5 and WinChip machine check handlers. | ||
| 449 | Who: Andi Kleen <andi@firstfloor.org> | ||
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 2db5893d6c97..29a6ff8bc7d3 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt | |||
| @@ -5,21 +5,51 @@ only the AMD64 specific ones are listed here. | |||
| 5 | 5 | ||
| 6 | Machine check | 6 | Machine check |
| 7 | 7 | ||
| 8 | mce=off disable machine check | 8 | Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables. |
| 9 | mce=bootlog Enable logging of machine checks left over from booting. | 9 | |
| 10 | Disabled by default on AMD because some BIOS leave bogus ones. | 10 | mce=off |
| 11 | If your BIOS doesn't do that it's a good idea to enable though | 11 | Disable machine check |
| 12 | to make sure you log even machine check events that result | 12 | mce=no_cmci |
| 13 | in a reboot. On Intel systems it is enabled by default. | 13 | Disable CMCI(Corrected Machine Check Interrupt) that |
| 14 | Intel processor supports. Usually this disablement is | ||
| 15 | not recommended, but it might be handy if your hardware | ||
| 16 | is misbehaving. | ||
| 17 | Note that you'll get more problems without CMCI than with | ||
| 18 | due to the shared banks, i.e. you might get duplicated | ||
| 19 | error logs. | ||
| 20 | mce=dont_log_ce | ||
| 21 | Don't make logs for corrected errors. All events reported | ||
| 22 | as corrected are silently cleared by OS. | ||
| 23 | This option will be useful if you have no interest in any | ||
| 24 | of corrected errors. | ||
| 25 | mce=ignore_ce | ||
| 26 | Disable features for corrected errors, e.g. polling timer | ||
| 27 | and CMCI. All events reported as corrected are not cleared | ||
| 28 | by OS and remained in its error banks. | ||
| 29 | Usually this disablement is not recommended, however if | ||
| 30 | there is an agent checking/clearing corrected errors | ||
| 31 | (e.g. BIOS or hardware monitoring applications), conflicting | ||
| 32 | with OS's error handling, and you cannot deactivate the agent, | ||
| 33 | then this option will be a help. | ||
| 34 | mce=bootlog | ||
| 35 | Enable logging of machine checks left over from booting. | ||
| 36 | Disabled by default on AMD because some BIOS leave bogus ones. | ||
| 37 | If your BIOS doesn't do that it's a good idea to enable though | ||
| 38 | to make sure you log even machine check events that result | ||
| 39 | in a reboot. On Intel systems it is enabled by default. | ||
| 14 | mce=nobootlog | 40 | mce=nobootlog |
| 15 | Disable boot machine check logging. | 41 | Disable boot machine check logging. |
| 16 | mce=tolerancelevel (number) | 42 | mce=tolerancelevel[,monarchtimeout] (number,number) |
| 43 | tolerance levels: | ||
| 17 | 0: always panic on uncorrected errors, log corrected errors | 44 | 0: always panic on uncorrected errors, log corrected errors |
| 18 | 1: panic or SIGBUS on uncorrected errors, log corrected errors | 45 | 1: panic or SIGBUS on uncorrected errors, log corrected errors |
| 19 | 2: SIGBUS or log uncorrected errors, log corrected errors | 46 | 2: SIGBUS or log uncorrected errors, log corrected errors |
| 20 | 3: never panic or SIGBUS, log all errors (for testing only) | 47 | 3: never panic or SIGBUS, log all errors (for testing only) |
| 21 | Default is 1 | 48 | Default is 1 |
| 22 | Can be also set using sysfs which is preferable. | 49 | Can be also set using sysfs which is preferable. |
| 50 | monarchtimeout: | ||
| 51 | Sets the time in us to wait for other CPUs on machine checks. 0 | ||
| 52 | to disable. | ||
| 23 | 53 | ||
| 24 | nomce (for compatibility with i386): same as mce=off | 54 | nomce (for compatibility with i386): same as mce=off |
| 25 | 55 | ||
diff --git a/Documentation/x86/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck index a05e58e7b159..b1fb30273286 100644 --- a/Documentation/x86/x86_64/machinecheck +++ b/Documentation/x86/x86_64/machinecheck | |||
| @@ -41,7 +41,9 @@ check_interval | |||
| 41 | the polling interval. When the poller stops finding MCEs, it | 41 | the polling interval. When the poller stops finding MCEs, it |
| 42 | triggers an exponential backoff (poll less often) on the polling | 42 | triggers an exponential backoff (poll less often) on the polling |
| 43 | interval. The check_interval variable is both the initial and | 43 | interval. The check_interval variable is both the initial and |
| 44 | maximum polling interval. | 44 | maximum polling interval. 0 means no polling for corrected machine |
| 45 | check errors (but some corrected errors might be still reported | ||
| 46 | in other ways) | ||
| 45 | 47 | ||
| 46 | tolerant | 48 | tolerant |
| 47 | Tolerance level. When a machine check exception occurs for a non | 49 | Tolerance level. When a machine check exception occurs for a non |
| @@ -67,6 +69,10 @@ trigger | |||
| 67 | Program to run when a machine check event is detected. | 69 | Program to run when a machine check event is detected. |
| 68 | This is an alternative to running mcelog regularly from cron | 70 | This is an alternative to running mcelog regularly from cron |
| 69 | and allows to detect events faster. | 71 | and allows to detect events faster. |
| 72 | monarch_timeout | ||
| 73 | How long to wait for the other CPUs to machine check too on a | ||
| 74 | exception. 0 to disable waiting for other CPUs. | ||
| 75 | Unit: us | ||
| 70 | 76 | ||
| 71 | TBD document entries for AMD threshold interrupt configuration | 77 | TBD document entries for AMD threshold interrupt configuration |
| 72 | 78 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 68f5578fe38e..356d2ec8e2fb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -789,10 +789,26 @@ config X86_MCE | |||
| 789 | to disable it. MCE support simply ignores non-MCE processors like | 789 | to disable it. MCE support simply ignores non-MCE processors like |
| 790 | the 386 and 486, so nearly everyone can say Y here. | 790 | the 386 and 486, so nearly everyone can say Y here. |
| 791 | 791 | ||
| 792 | config X86_OLD_MCE | ||
| 793 | depends on X86_32 && X86_MCE | ||
| 794 | bool "Use legacy machine check code (will go away)" | ||
| 795 | default n | ||
| 796 | select X86_ANCIENT_MCE | ||
| 797 | ---help--- | ||
| 798 | Use the old i386 machine check code. This is merely intended for | ||
| 799 | testing in a transition period. Try this if you run into any machine | ||
| 800 | check related software problems, but report the problem to | ||
| 801 | linux-kernel. When in doubt say no. | ||
| 802 | |||
| 803 | config X86_NEW_MCE | ||
| 804 | depends on X86_MCE | ||
| 805 | bool | ||
| 806 | default y if (!X86_OLD_MCE && X86_32) || X86_64 | ||
| 807 | |||
| 792 | config X86_MCE_INTEL | 808 | config X86_MCE_INTEL |
| 793 | def_bool y | 809 | def_bool y |
| 794 | prompt "Intel MCE features" | 810 | prompt "Intel MCE features" |
| 795 | depends on X86_64 && X86_MCE && X86_LOCAL_APIC | 811 | depends on X86_NEW_MCE && X86_LOCAL_APIC |
| 796 | ---help--- | 812 | ---help--- |
| 797 | Additional support for intel specific MCE features such as | 813 | Additional support for intel specific MCE features such as |
| 798 | the thermal monitor. | 814 | the thermal monitor. |
| @@ -800,19 +816,36 @@ config X86_MCE_INTEL | |||
| 800 | config X86_MCE_AMD | 816 | config X86_MCE_AMD |
| 801 | def_bool y | 817 | def_bool y |
| 802 | prompt "AMD MCE features" | 818 | prompt "AMD MCE features" |
| 803 | depends on X86_64 && X86_MCE && X86_LOCAL_APIC | 819 | depends on X86_NEW_MCE && X86_LOCAL_APIC |
| 804 | ---help--- | 820 | ---help--- |
| 805 | Additional support for AMD specific MCE features such as | 821 | Additional support for AMD specific MCE features such as |
| 806 | the DRAM Error Threshold. | 822 | the DRAM Error Threshold. |
| 807 | 823 | ||
| 824 | config X86_ANCIENT_MCE | ||
| 825 | def_bool n | ||
| 826 | depends on X86_32 | ||
| 827 | prompt "Support for old Pentium 5 / WinChip machine checks" | ||
| 828 | ---help--- | ||
| 829 | Include support for machine check handling on old Pentium 5 or WinChip | ||
| 830 | systems. These typically need to be enabled explicitely on the command | ||
| 831 | line. | ||
| 832 | |||
| 808 | config X86_MCE_THRESHOLD | 833 | config X86_MCE_THRESHOLD |
| 809 | depends on X86_MCE_AMD || X86_MCE_INTEL | 834 | depends on X86_MCE_AMD || X86_MCE_INTEL |
| 810 | bool | 835 | bool |
| 811 | default y | 836 | default y |
| 812 | 837 | ||
| 838 | config X86_MCE_INJECT | ||
| 839 | depends on X86_NEW_MCE | ||
| 840 | tristate "Machine check injector support" | ||
| 841 | ---help--- | ||
| 842 | Provide support for injecting machine checks for testing purposes. | ||
| 843 | If you don't know what a machine check is and you don't do kernel | ||
| 844 | QA it is safe to say n. | ||
| 845 | |||
| 813 | config X86_MCE_NONFATAL | 846 | config X86_MCE_NONFATAL |
| 814 | tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" | 847 | tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" |
| 815 | depends on X86_32 && X86_MCE | 848 | depends on X86_OLD_MCE |
| 816 | ---help--- | 849 | ---help--- |
| 817 | Enabling this feature starts a timer that triggers every 5 seconds which | 850 | Enabling this feature starts a timer that triggers every 5 seconds which |
| 818 | will look at the machine check registers to see if anything happened. | 851 | will look at the machine check registers to see if anything happened. |
| @@ -825,11 +858,15 @@ config X86_MCE_NONFATAL | |||
| 825 | 858 | ||
| 826 | config X86_MCE_P4THERMAL | 859 | config X86_MCE_P4THERMAL |
| 827 | bool "check for P4 thermal throttling interrupt." | 860 | bool "check for P4 thermal throttling interrupt." |
| 828 | depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) | 861 | depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP) |
| 829 | ---help--- | 862 | ---help--- |
| 830 | Enabling this feature will cause a message to be printed when the P4 | 863 | Enabling this feature will cause a message to be printed when the P4 |
| 831 | enters thermal throttling. | 864 | enters thermal throttling. |
| 832 | 865 | ||
| 866 | config X86_THERMAL_VECTOR | ||
| 867 | def_bool y | ||
| 868 | depends on X86_MCE_P4THERMAL || X86_MCE_INTEL | ||
| 869 | |||
| 833 | config VM86 | 870 | config VM86 |
| 834 | bool "Enable VM86 support" if EMBEDDED | 871 | bool "Enable VM86 support" if EMBEDDED |
| 835 | default y | 872 | default y |
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index d750a10ccad6..ff8cbfa07851 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
| @@ -14,6 +14,7 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) | |||
| 14 | BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) | 14 | BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) |
| 15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) | 15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) |
| 16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) | 16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) |
| 17 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) | ||
| 17 | 18 | ||
| 18 | BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0, | 19 | BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0, |
| 19 | smp_invalidate_interrupt) | 20 | smp_invalidate_interrupt) |
| @@ -52,8 +53,16 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) | |||
| 52 | BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) | 53 | BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) |
| 53 | #endif | 54 | #endif |
| 54 | 55 | ||
| 55 | #ifdef CONFIG_X86_MCE_P4THERMAL | 56 | #ifdef CONFIG_X86_THERMAL_VECTOR |
| 56 | BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) | 57 | BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) |
| 57 | #endif | 58 | #endif |
| 58 | 59 | ||
| 60 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
| 61 | BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) | ||
| 62 | #endif | ||
| 63 | |||
| 64 | #ifdef CONFIG_X86_NEW_MCE | ||
| 65 | BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR) | ||
| 66 | #endif | ||
| 67 | |||
| 59 | #endif | 68 | #endif |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 9ebc5c255032..82e3e8f01043 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
| @@ -22,7 +22,7 @@ typedef struct { | |||
| 22 | #endif | 22 | #endif |
| 23 | #ifdef CONFIG_X86_MCE | 23 | #ifdef CONFIG_X86_MCE |
| 24 | unsigned int irq_thermal_count; | 24 | unsigned int irq_thermal_count; |
| 25 | # ifdef CONFIG_X86_64 | 25 | # ifdef CONFIG_X86_MCE_THRESHOLD |
| 26 | unsigned int irq_threshold_count; | 26 | unsigned int irq_threshold_count; |
| 27 | # endif | 27 | # endif |
| 28 | #endif | 28 | #endif |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 6df45f639666..ba180d93b08c 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
| @@ -34,6 +34,7 @@ extern void perf_pending_interrupt(void); | |||
| 34 | extern void spurious_interrupt(void); | 34 | extern void spurious_interrupt(void); |
| 35 | extern void thermal_interrupt(void); | 35 | extern void thermal_interrupt(void); |
| 36 | extern void reschedule_interrupt(void); | 36 | extern void reschedule_interrupt(void); |
| 37 | extern void mce_self_interrupt(void); | ||
| 37 | 38 | ||
| 38 | extern void invalidate_interrupt(void); | 39 | extern void invalidate_interrupt(void); |
| 39 | extern void invalidate_interrupt0(void); | 40 | extern void invalidate_interrupt0(void); |
| @@ -46,6 +47,7 @@ extern void invalidate_interrupt6(void); | |||
| 46 | extern void invalidate_interrupt7(void); | 47 | extern void invalidate_interrupt7(void); |
| 47 | 48 | ||
| 48 | extern void irq_move_cleanup_interrupt(void); | 49 | extern void irq_move_cleanup_interrupt(void); |
| 50 | extern void reboot_interrupt(void); | ||
| 49 | extern void threshold_interrupt(void); | 51 | extern void threshold_interrupt(void); |
| 50 | 52 | ||
| 51 | extern void call_function_interrupt(void); | 53 | extern void call_function_interrupt(void); |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index e997be98c9b9..5b21f0ec3df2 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | */ | 25 | */ |
| 26 | 26 | ||
| 27 | #define NMI_VECTOR 0x02 | 27 | #define NMI_VECTOR 0x02 |
| 28 | #define MCE_VECTOR 0x12 | ||
| 28 | 29 | ||
| 29 | /* | 30 | /* |
| 30 | * IDT vectors usable for external interrupt sources start | 31 | * IDT vectors usable for external interrupt sources start |
| @@ -87,13 +88,8 @@ | |||
| 87 | #define CALL_FUNCTION_VECTOR 0xfc | 88 | #define CALL_FUNCTION_VECTOR 0xfc |
| 88 | #define CALL_FUNCTION_SINGLE_VECTOR 0xfb | 89 | #define CALL_FUNCTION_SINGLE_VECTOR 0xfb |
| 89 | #define THERMAL_APIC_VECTOR 0xfa | 90 | #define THERMAL_APIC_VECTOR 0xfa |
| 90 | 91 | #define THRESHOLD_APIC_VECTOR 0xf9 | |
| 91 | #ifdef CONFIG_X86_32 | 92 | #define REBOOT_VECTOR 0xf8 |
| 92 | /* 0xf8 - 0xf9 : free */ | ||
| 93 | #else | ||
| 94 | # define THRESHOLD_APIC_VECTOR 0xf9 | ||
| 95 | # define UV_BAU_MESSAGE 0xf8 | ||
| 96 | #endif | ||
| 97 | 93 | ||
| 98 | /* f0-f7 used for spreading out TLB flushes: */ | 94 | /* f0-f7 used for spreading out TLB flushes: */ |
| 99 | #define INVALIDATE_TLB_VECTOR_END 0xf7 | 95 | #define INVALIDATE_TLB_VECTOR_END 0xf7 |
| @@ -117,6 +113,13 @@ | |||
| 117 | */ | 113 | */ |
| 118 | #define LOCAL_PENDING_VECTOR 0xec | 114 | #define LOCAL_PENDING_VECTOR 0xec |
| 119 | 115 | ||
| 116 | #define UV_BAU_MESSAGE 0xec | ||
| 117 | |||
| 118 | /* | ||
| 119 | * Self IPI vector for machine checks | ||
| 120 | */ | ||
| 121 | #define MCE_SELF_VECTOR 0xeb | ||
| 122 | |||
| 120 | /* | 123 | /* |
| 121 | * First APIC vector available to drivers: (vectors 0x30-0xee) we | 124 | * First APIC vector available to drivers: (vectors 0x30-0xee) we |
| 122 | * start at 0x31(0x41) to spread out vectors evenly between priority | 125 | * start at 0x31(0x41) to spread out vectors evenly between priority |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4f8c199584e7..540a466e50f5 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
| @@ -1,8 +1,6 @@ | |||
| 1 | #ifndef _ASM_X86_MCE_H | 1 | #ifndef _ASM_X86_MCE_H |
| 2 | #define _ASM_X86_MCE_H | 2 | #define _ASM_X86_MCE_H |
| 3 | 3 | ||
| 4 | #ifdef __x86_64__ | ||
| 5 | |||
| 6 | #include <linux/types.h> | 4 | #include <linux/types.h> |
| 7 | #include <asm/ioctls.h> | 5 | #include <asm/ioctls.h> |
| 8 | 6 | ||
| @@ -10,21 +8,35 @@ | |||
| 10 | * Machine Check support for x86 | 8 | * Machine Check support for x86 |
| 11 | */ | 9 | */ |
| 12 | 10 | ||
| 13 | #define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */ | 11 | #define MCG_BANKCNT_MASK 0xff /* Number of Banks */ |
| 14 | #define MCG_EXT_P (1ULL<<9) /* Extended registers available */ | 12 | #define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ |
| 15 | #define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ | 13 | #define MCG_EXT_P (1ULL<<9) /* Extended registers available */ |
| 16 | 14 | #define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ | |
| 17 | #define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */ | 15 | #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ |
| 18 | #define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */ | 16 | #define MCG_EXT_CNT_SHIFT 16 |
| 19 | #define MCG_STATUS_MCIP (1UL<<2) /* machine check in progress */ | 17 | #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) |
| 20 | 18 | #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ | |
| 21 | #define MCI_STATUS_VAL (1UL<<63) /* valid error */ | 19 | |
| 22 | #define MCI_STATUS_OVER (1UL<<62) /* previous errors lost */ | 20 | #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ |
| 23 | #define MCI_STATUS_UC (1UL<<61) /* uncorrected error */ | 21 | #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ |
| 24 | #define MCI_STATUS_EN (1UL<<60) /* error enabled */ | 22 | #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ |
| 25 | #define MCI_STATUS_MISCV (1UL<<59) /* misc error reg. valid */ | 23 | |
| 26 | #define MCI_STATUS_ADDRV (1UL<<58) /* addr reg. valid */ | 24 | #define MCI_STATUS_VAL (1ULL<<63) /* valid error */ |
| 27 | #define MCI_STATUS_PCC (1UL<<57) /* processor context corrupt */ | 25 | #define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ |
| 26 | #define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ | ||
| 27 | #define MCI_STATUS_EN (1ULL<<60) /* error enabled */ | ||
| 28 | #define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ | ||
| 29 | #define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ | ||
| 30 | #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ | ||
| 31 | #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ | ||
| 32 | #define MCI_STATUS_AR (1ULL<<55) /* Action required */ | ||
| 33 | |||
| 34 | /* MISC register defines */ | ||
| 35 | #define MCM_ADDR_SEGOFF 0 /* segment offset */ | ||
| 36 | #define MCM_ADDR_LINEAR 1 /* linear address */ | ||
| 37 | #define MCM_ADDR_PHYS 2 /* physical address */ | ||
| 38 | #define MCM_ADDR_MEM 3 /* memory address */ | ||
| 39 | #define MCM_ADDR_GENERIC 7 /* generic */ | ||
| 28 | 40 | ||
| 29 | /* Fields are zero when not available */ | 41 | /* Fields are zero when not available */ |
| 30 | struct mce { | 42 | struct mce { |
| @@ -34,13 +46,19 @@ struct mce { | |||
| 34 | __u64 mcgstatus; | 46 | __u64 mcgstatus; |
| 35 | __u64 ip; | 47 | __u64 ip; |
| 36 | __u64 tsc; /* cpu time stamp counter */ | 48 | __u64 tsc; /* cpu time stamp counter */ |
| 37 | __u64 res1; /* for future extension */ | 49 | __u64 time; /* wall time_t when error was detected */ |
| 38 | __u64 res2; /* dito. */ | 50 | __u8 cpuvendor; /* cpu vendor as encoded in system.h */ |
| 51 | __u8 pad1; | ||
| 52 | __u16 pad2; | ||
| 53 | __u32 cpuid; /* CPUID 1 EAX */ | ||
| 39 | __u8 cs; /* code segment */ | 54 | __u8 cs; /* code segment */ |
| 40 | __u8 bank; /* machine check bank */ | 55 | __u8 bank; /* machine check bank */ |
| 41 | __u8 cpu; /* cpu that raised the error */ | 56 | __u8 cpu; /* cpu number; obsolete; use extcpu now */ |
| 42 | __u8 finished; /* entry is valid */ | 57 | __u8 finished; /* entry is valid */ |
| 43 | __u32 pad; | 58 | __u32 extcpu; /* linux cpu number that detected the error */ |
| 59 | __u32 socketid; /* CPU socket ID */ | ||
| 60 | __u32 apicid; /* CPU initial apic ID */ | ||
| 61 | __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ | ||
| 44 | }; | 62 | }; |
| 45 | 63 | ||
| 46 | /* | 64 | /* |
| @@ -57,7 +75,7 @@ struct mce_log { | |||
| 57 | unsigned len; /* = MCE_LOG_LEN */ | 75 | unsigned len; /* = MCE_LOG_LEN */ |
| 58 | unsigned next; | 76 | unsigned next; |
| 59 | unsigned flags; | 77 | unsigned flags; |
| 60 | unsigned pad0; | 78 | unsigned recordlen; /* length of struct mce */ |
| 61 | struct mce entry[MCE_LOG_LEN]; | 79 | struct mce entry[MCE_LOG_LEN]; |
| 62 | }; | 80 | }; |
| 63 | 81 | ||
| @@ -82,19 +100,16 @@ struct mce_log { | |||
| 82 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) | 100 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) |
| 83 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) | 101 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) |
| 84 | 102 | ||
| 85 | #endif /* __x86_64__ */ | ||
| 86 | |||
| 87 | #ifdef __KERNEL__ | 103 | #ifdef __KERNEL__ |
| 88 | 104 | ||
| 89 | #ifdef CONFIG_X86_32 | ||
| 90 | extern int mce_disabled; | 105 | extern int mce_disabled; |
| 91 | #else /* CONFIG_X86_32 */ | ||
| 92 | 106 | ||
| 93 | #include <asm/atomic.h> | 107 | #include <asm/atomic.h> |
| 108 | #include <linux/percpu.h> | ||
| 94 | 109 | ||
| 95 | void mce_setup(struct mce *m); | 110 | void mce_setup(struct mce *m); |
| 96 | void mce_log(struct mce *m); | 111 | void mce_log(struct mce *m); |
| 97 | DECLARE_PER_CPU(struct sys_device, device_mce); | 112 | DECLARE_PER_CPU(struct sys_device, mce_dev); |
| 98 | extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | 113 | extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); |
| 99 | 114 | ||
| 100 | /* | 115 | /* |
| @@ -104,6 +119,8 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | |||
| 104 | #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) | 119 | #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) |
| 105 | 120 | ||
| 106 | #ifdef CONFIG_X86_MCE_INTEL | 121 | #ifdef CONFIG_X86_MCE_INTEL |
| 122 | extern int mce_cmci_disabled; | ||
| 123 | extern int mce_ignore_ce; | ||
| 107 | void mce_intel_feature_init(struct cpuinfo_x86 *c); | 124 | void mce_intel_feature_init(struct cpuinfo_x86 *c); |
| 108 | void cmci_clear(void); | 125 | void cmci_clear(void); |
| 109 | void cmci_reenable(void); | 126 | void cmci_reenable(void); |
| @@ -123,13 +140,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c); | |||
| 123 | static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } | 140 | static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } |
| 124 | #endif | 141 | #endif |
| 125 | 142 | ||
| 126 | extern int mce_available(struct cpuinfo_x86 *c); | 143 | int mce_available(struct cpuinfo_x86 *c); |
| 144 | |||
| 145 | DECLARE_PER_CPU(unsigned, mce_exception_count); | ||
| 146 | DECLARE_PER_CPU(unsigned, mce_poll_count); | ||
| 127 | 147 | ||
| 128 | void mce_log_therm_throt_event(__u64 status); | 148 | void mce_log_therm_throt_event(__u64 status); |
| 129 | 149 | ||
| 130 | extern atomic_t mce_entry; | 150 | extern atomic_t mce_entry; |
| 131 | 151 | ||
| 132 | extern void do_machine_check(struct pt_regs *, long); | 152 | void do_machine_check(struct pt_regs *, long); |
| 133 | 153 | ||
| 134 | typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); | 154 | typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); |
| 135 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); | 155 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); |
| @@ -139,14 +159,16 @@ enum mcp_flags { | |||
| 139 | MCP_UC = (1 << 1), /* log uncorrected errors */ | 159 | MCP_UC = (1 << 1), /* log uncorrected errors */ |
| 140 | MCP_DONTLOG = (1 << 2), /* only clear, don't log */ | 160 | MCP_DONTLOG = (1 << 2), /* only clear, don't log */ |
| 141 | }; | 161 | }; |
| 142 | extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); | 162 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); |
| 143 | 163 | ||
| 144 | extern int mce_notify_user(void); | 164 | int mce_notify_irq(void); |
| 165 | void mce_notify_process(void); | ||
| 145 | 166 | ||
| 146 | #endif /* !CONFIG_X86_32 */ | 167 | DECLARE_PER_CPU(struct mce, injectm); |
| 168 | extern struct file_operations mce_chrdev_ops; | ||
| 147 | 169 | ||
| 148 | #ifdef CONFIG_X86_MCE | 170 | #ifdef CONFIG_X86_MCE |
| 149 | extern void mcheck_init(struct cpuinfo_x86 *c); | 171 | void mcheck_init(struct cpuinfo_x86 *c); |
| 150 | #else | 172 | #else |
| 151 | #define mcheck_init(c) do { } while (0) | 173 | #define mcheck_init(c) do { } while (0) |
| 152 | #endif | 174 | #endif |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4d58d04fca83..1692fb5050e3 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
| @@ -207,7 +207,14 @@ | |||
| 207 | 207 | ||
| 208 | #define MSR_IA32_THERM_CONTROL 0x0000019a | 208 | #define MSR_IA32_THERM_CONTROL 0x0000019a |
| 209 | #define MSR_IA32_THERM_INTERRUPT 0x0000019b | 209 | #define MSR_IA32_THERM_INTERRUPT 0x0000019b |
| 210 | |||
| 211 | #define THERM_INT_LOW_ENABLE (1 << 0) | ||
| 212 | #define THERM_INT_HIGH_ENABLE (1 << 1) | ||
| 213 | |||
| 210 | #define MSR_IA32_THERM_STATUS 0x0000019c | 214 | #define MSR_IA32_THERM_STATUS 0x0000019c |
| 215 | |||
| 216 | #define THERM_STATUS_PROCHOT (1 << 0) | ||
| 217 | |||
| 211 | #define MSR_IA32_MISC_ENABLE 0x000001a0 | 218 | #define MSR_IA32_MISC_ENABLE 0x000001a0 |
| 212 | 219 | ||
| 213 | /* MISC_ENABLE bits: architectural */ | 220 | /* MISC_ENABLE bits: architectural */ |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 076d3881f3da..8c7c042ecad1 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
| @@ -899,7 +899,7 @@ void clear_local_APIC(void) | |||
| 899 | } | 899 | } |
| 900 | 900 | ||
| 901 | /* lets not touch this if we didn't frob it */ | 901 | /* lets not touch this if we didn't frob it */ |
| 902 | #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) | 902 | #ifdef CONFIG_X86_THERMAL_VECTOR |
| 903 | if (maxlvt >= 5) { | 903 | if (maxlvt >= 5) { |
| 904 | v = apic_read(APIC_LVTTHMR); | 904 | v = apic_read(APIC_LVTTHMR); |
| 905 | apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); | 905 | apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); |
| @@ -2017,7 +2017,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) | |||
| 2017 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); | 2017 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); |
| 2018 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); | 2018 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); |
| 2019 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); | 2019 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); |
| 2020 | #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) | 2020 | #ifdef CONFIG_X86_THERMAL_VECTOR |
| 2021 | if (maxlvt >= 5) | 2021 | if (maxlvt >= 5) |
| 2022 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); | 2022 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); |
| 2023 | #endif | 2023 | #endif |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index a691302dc3ff..b3025b43b63a 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
| @@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu) | |||
| 66 | 66 | ||
| 67 | static inline int mce_in_progress(void) | 67 | static inline int mce_in_progress(void) |
| 68 | { | 68 | { |
| 69 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) | 69 | #if defined(CONFIG_X86_NEW_MCE) |
| 70 | return atomic_read(&mce_entry) > 0; | 70 | return atomic_read(&mce_entry) > 0; |
| 71 | #endif | 71 | #endif |
| 72 | return 0; | 72 | return 0; |
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index b2f89829bbe8..45004faf67ea 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
| @@ -1,7 +1,11 @@ | |||
| 1 | obj-y = mce_$(BITS).o therm_throt.o | 1 | obj-y = mce.o therm_throt.o |
| 2 | 2 | ||
| 3 | obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o | 3 | obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o |
| 4 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o | 4 | obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o |
| 5 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o | ||
| 6 | obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o | ||
| 7 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o | ||
| 5 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o | 8 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o |
| 6 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o | 9 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o |
| 7 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | 10 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o |
| 11 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o | ||
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index dd3af6e7b39a..89e510424152 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c | |||
| @@ -2,11 +2,10 @@ | |||
| 2 | * Athlon specific Machine Check Exception Reporting | 2 | * Athlon specific Machine Check Exception Reporting |
| 3 | * (C) Copyright 2002 Dave Jones <davej@redhat.com> | 3 | * (C) Copyright 2002 Dave Jones <davej@redhat.com> |
| 4 | */ | 4 | */ |
| 5 | |||
| 6 | #include <linux/init.h> | ||
| 7 | #include <linux/types.h> | ||
| 8 | #include <linux/kernel.h> | ||
| 9 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
| 6 | #include <linux/kernel.h> | ||
| 7 | #include <linux/types.h> | ||
| 8 | #include <linux/init.h> | ||
| 10 | #include <linux/smp.h> | 9 | #include <linux/smp.h> |
| 11 | 10 | ||
| 12 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
| @@ -15,12 +14,12 @@ | |||
| 15 | 14 | ||
| 16 | #include "mce.h" | 15 | #include "mce.h" |
| 17 | 16 | ||
| 18 | /* Machine Check Handler For AMD Athlon/Duron */ | 17 | /* Machine Check Handler For AMD Athlon/Duron: */ |
| 19 | static void k7_machine_check(struct pt_regs *regs, long error_code) | 18 | static void k7_machine_check(struct pt_regs *regs, long error_code) |
| 20 | { | 19 | { |
| 21 | int recover = 1; | ||
| 22 | u32 alow, ahigh, high, low; | 20 | u32 alow, ahigh, high, low; |
| 23 | u32 mcgstl, mcgsth; | 21 | u32 mcgstl, mcgsth; |
| 22 | int recover = 1; | ||
| 24 | int i; | 23 | int i; |
| 25 | 24 | ||
| 26 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 25 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
| @@ -32,15 +31,19 @@ static void k7_machine_check(struct pt_regs *regs, long error_code) | |||
| 32 | 31 | ||
| 33 | for (i = 1; i < nr_mce_banks; i++) { | 32 | for (i = 1; i < nr_mce_banks; i++) { |
| 34 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | 33 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); |
| 35 | if (high&(1<<31)) { | 34 | if (high & (1<<31)) { |
| 36 | char misc[20]; | 35 | char misc[20]; |
| 37 | char addr[24]; | 36 | char addr[24]; |
| 38 | misc[0] = addr[0] = '\0'; | 37 | |
| 38 | misc[0] = '\0'; | ||
| 39 | addr[0] = '\0'; | ||
| 40 | |||
| 39 | if (high & (1<<29)) | 41 | if (high & (1<<29)) |
| 40 | recover |= 1; | 42 | recover |= 1; |
| 41 | if (high & (1<<25)) | 43 | if (high & (1<<25)) |
| 42 | recover |= 2; | 44 | recover |= 2; |
| 43 | high &= ~(1<<31); | 45 | high &= ~(1<<31); |
| 46 | |||
| 44 | if (high & (1<<27)) { | 47 | if (high & (1<<27)) { |
| 45 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | 48 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); |
| 46 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | 49 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); |
| @@ -49,27 +52,31 @@ static void k7_machine_check(struct pt_regs *regs, long error_code) | |||
| 49 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | 52 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); |
| 50 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | 53 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); |
| 51 | } | 54 | } |
| 55 | |||
| 52 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | 56 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", |
| 53 | smp_processor_id(), i, high, low, misc, addr); | 57 | smp_processor_id(), i, high, low, misc, addr); |
| 54 | /* Clear it */ | 58 | |
| 59 | /* Clear it: */ | ||
| 55 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | 60 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); |
| 56 | /* Serialize */ | 61 | /* Serialize: */ |
| 57 | wmb(); | 62 | wmb(); |
| 58 | add_taint(TAINT_MACHINE_CHECK); | 63 | add_taint(TAINT_MACHINE_CHECK); |
| 59 | } | 64 | } |
| 60 | } | 65 | } |
| 61 | 66 | ||
| 62 | if (recover&2) | 67 | if (recover & 2) |
| 63 | panic("CPU context corrupt"); | 68 | panic("CPU context corrupt"); |
| 64 | if (recover&1) | 69 | if (recover & 1) |
| 65 | panic("Unable to continue"); | 70 | panic("Unable to continue"); |
| 71 | |||
| 66 | printk(KERN_EMERG "Attempting to continue.\n"); | 72 | printk(KERN_EMERG "Attempting to continue.\n"); |
| 73 | |||
| 67 | mcgstl &= ~(1<<2); | 74 | mcgstl &= ~(1<<2); |
| 68 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 75 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
| 69 | } | 76 | } |
| 70 | 77 | ||
| 71 | 78 | ||
| 72 | /* AMD K7 machine check is Intel like */ | 79 | /* AMD K7 machine check is Intel like: */ |
| 73 | void amd_mcheck_init(struct cpuinfo_x86 *c) | 80 | void amd_mcheck_init(struct cpuinfo_x86 *c) |
| 74 | { | 81 | { |
| 75 | u32 l, h; | 82 | u32 l, h; |
| @@ -79,21 +86,26 @@ void amd_mcheck_init(struct cpuinfo_x86 *c) | |||
| 79 | return; | 86 | return; |
| 80 | 87 | ||
| 81 | machine_check_vector = k7_machine_check; | 88 | machine_check_vector = k7_machine_check; |
| 89 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
| 82 | wmb(); | 90 | wmb(); |
| 83 | 91 | ||
| 84 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | 92 | printk(KERN_INFO "Intel machine check architecture supported.\n"); |
| 93 | |||
| 85 | rdmsr(MSR_IA32_MCG_CAP, l, h); | 94 | rdmsr(MSR_IA32_MCG_CAP, l, h); |
| 86 | if (l & (1<<8)) /* Control register present ? */ | 95 | if (l & (1<<8)) /* Control register present ? */ |
| 87 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 96 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
| 88 | nr_mce_banks = l & 0xff; | 97 | nr_mce_banks = l & 0xff; |
| 89 | 98 | ||
| 90 | /* Clear status for MC index 0 separately, we don't touch CTL, | 99 | /* |
| 91 | * as some K7 Athlons cause spurious MCEs when its enabled. */ | 100 | * Clear status for MC index 0 separately, we don't touch CTL, |
| 101 | * as some K7 Athlons cause spurious MCEs when its enabled: | ||
| 102 | */ | ||
| 92 | if (boot_cpu_data.x86 == 6) { | 103 | if (boot_cpu_data.x86 == 6) { |
| 93 | wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); | 104 | wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); |
| 94 | i = 1; | 105 | i = 1; |
| 95 | } else | 106 | } else |
| 96 | i = 0; | 107 | i = 0; |
| 108 | |||
| 97 | for (; i < nr_mce_banks; i++) { | 109 | for (; i < nr_mce_banks; i++) { |
| 98 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | 110 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); |
| 99 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | 111 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c new file mode 100644 index 000000000000..a3a235a53f09 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
| @@ -0,0 +1,127 @@ | |||
| 1 | /* | ||
| 2 | * Machine check injection support. | ||
| 3 | * Copyright 2008 Intel Corporation. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or | ||
| 6 | * modify it under the terms of the GNU General Public License | ||
| 7 | * as published by the Free Software Foundation; version 2 | ||
| 8 | * of the License. | ||
| 9 | * | ||
| 10 | * Authors: | ||
| 11 | * Andi Kleen | ||
| 12 | * Ying Huang | ||
| 13 | */ | ||
| 14 | #include <linux/uaccess.h> | ||
| 15 | #include <linux/module.h> | ||
| 16 | #include <linux/timer.h> | ||
| 17 | #include <linux/kernel.h> | ||
| 18 | #include <linux/string.h> | ||
| 19 | #include <linux/fs.h> | ||
| 20 | #include <linux/smp.h> | ||
| 21 | #include <asm/mce.h> | ||
| 22 | |||
| 23 | /* Update fake mce registers on current CPU. */ | ||
| 24 | static void inject_mce(struct mce *m) | ||
| 25 | { | ||
| 26 | struct mce *i = &per_cpu(injectm, m->extcpu); | ||
| 27 | |||
| 28 | /* Make sure noone reads partially written injectm */ | ||
| 29 | i->finished = 0; | ||
| 30 | mb(); | ||
| 31 | m->finished = 0; | ||
| 32 | /* First set the fields after finished */ | ||
| 33 | i->extcpu = m->extcpu; | ||
| 34 | mb(); | ||
| 35 | /* Now write record in order, finished last (except above) */ | ||
| 36 | memcpy(i, m, sizeof(struct mce)); | ||
| 37 | /* Finally activate it */ | ||
| 38 | mb(); | ||
| 39 | i->finished = 1; | ||
| 40 | } | ||
| 41 | |||
| 42 | struct delayed_mce { | ||
| 43 | struct timer_list timer; | ||
| 44 | struct mce m; | ||
| 45 | }; | ||
| 46 | |||
| 47 | /* Inject mce on current CPU */ | ||
| 48 | static void raise_mce(unsigned long data) | ||
| 49 | { | ||
| 50 | struct delayed_mce *dm = (struct delayed_mce *)data; | ||
| 51 | struct mce *m = &dm->m; | ||
| 52 | int cpu = m->extcpu; | ||
| 53 | |||
| 54 | inject_mce(m); | ||
| 55 | if (m->status & MCI_STATUS_UC) { | ||
| 56 | struct pt_regs regs; | ||
| 57 | memset(®s, 0, sizeof(struct pt_regs)); | ||
| 58 | regs.ip = m->ip; | ||
| 59 | regs.cs = m->cs; | ||
| 60 | printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); | ||
| 61 | do_machine_check(®s, 0); | ||
| 62 | printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); | ||
| 63 | } else { | ||
| 64 | mce_banks_t b; | ||
| 65 | memset(&b, 0xff, sizeof(mce_banks_t)); | ||
| 66 | printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); | ||
| 67 | machine_check_poll(0, &b); | ||
| 68 | mce_notify_irq(); | ||
| 69 | printk(KERN_INFO "Finished machine check poll on CPU %d\n", | ||
| 70 | cpu); | ||
| 71 | } | ||
| 72 | kfree(dm); | ||
| 73 | } | ||
| 74 | |||
| 75 | /* Error injection interface */ | ||
| 76 | static ssize_t mce_write(struct file *filp, const char __user *ubuf, | ||
| 77 | size_t usize, loff_t *off) | ||
| 78 | { | ||
| 79 | struct delayed_mce *dm; | ||
| 80 | struct mce m; | ||
| 81 | |||
| 82 | if (!capable(CAP_SYS_ADMIN)) | ||
| 83 | return -EPERM; | ||
| 84 | /* | ||
| 85 | * There are some cases where real MSR reads could slip | ||
| 86 | * through. | ||
| 87 | */ | ||
| 88 | if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA)) | ||
| 89 | return -EIO; | ||
| 90 | |||
| 91 | if ((unsigned long)usize > sizeof(struct mce)) | ||
| 92 | usize = sizeof(struct mce); | ||
| 93 | if (copy_from_user(&m, ubuf, usize)) | ||
| 94 | return -EFAULT; | ||
| 95 | |||
| 96 | if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) | ||
| 97 | return -EINVAL; | ||
| 98 | |||
| 99 | dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL); | ||
| 100 | if (!dm) | ||
| 101 | return -ENOMEM; | ||
| 102 | |||
| 103 | /* | ||
| 104 | * Need to give user space some time to set everything up, | ||
| 105 | * so do it a jiffie or two later everywhere. | ||
| 106 | * Should we use a hrtimer here for better synchronization? | ||
| 107 | */ | ||
| 108 | memcpy(&dm->m, &m, sizeof(struct mce)); | ||
| 109 | setup_timer(&dm->timer, raise_mce, (unsigned long)dm); | ||
| 110 | dm->timer.expires = jiffies + 2; | ||
| 111 | add_timer_on(&dm->timer, m.extcpu); | ||
| 112 | return usize; | ||
| 113 | } | ||
| 114 | |||
| 115 | static int inject_init(void) | ||
| 116 | { | ||
| 117 | printk(KERN_INFO "Machine check injector initialized\n"); | ||
| 118 | mce_chrdev_ops.write = mce_write; | ||
| 119 | return 0; | ||
| 120 | } | ||
| 121 | |||
| 122 | module_init(inject_init); | ||
| 123 | /* | ||
| 124 | * Cannot tolerate unloading currently because we cannot | ||
| 125 | * guarantee all openers of mce_chrdev will get a reference to us. | ||
| 126 | */ | ||
| 127 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h new file mode 100644 index 000000000000..54dcb8ff12e5 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | #include <asm/mce.h> | ||
| 2 | |||
| 3 | enum severity_level { | ||
| 4 | MCE_NO_SEVERITY, | ||
| 5 | MCE_KEEP_SEVERITY, | ||
| 6 | MCE_SOME_SEVERITY, | ||
| 7 | MCE_AO_SEVERITY, | ||
| 8 | MCE_UC_SEVERITY, | ||
| 9 | MCE_AR_SEVERITY, | ||
| 10 | MCE_PANIC_SEVERITY, | ||
| 11 | }; | ||
| 12 | |||
| 13 | int mce_severity(struct mce *a, int tolerant, char **msg); | ||
| 14 | |||
| 15 | extern int mce_ser; | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c new file mode 100644 index 000000000000..ff0807f97056 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
| @@ -0,0 +1,218 @@ | |||
| 1 | /* | ||
| 2 | * MCE grading rules. | ||
| 3 | * Copyright 2008, 2009 Intel Corporation. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or | ||
| 6 | * modify it under the terms of the GNU General Public License | ||
| 7 | * as published by the Free Software Foundation; version 2 | ||
| 8 | * of the License. | ||
| 9 | * | ||
| 10 | * Author: Andi Kleen | ||
| 11 | */ | ||
| 12 | #include <linux/kernel.h> | ||
| 13 | #include <linux/seq_file.h> | ||
| 14 | #include <linux/init.h> | ||
| 15 | #include <linux/debugfs.h> | ||
| 16 | #include <asm/mce.h> | ||
| 17 | |||
| 18 | #include "mce-internal.h" | ||
| 19 | |||
| 20 | /* | ||
| 21 | * Grade an mce by severity. In general the most severe ones are processed | ||
| 22 | * first. Since there are quite a lot of combinations test the bits in a | ||
| 23 | * table-driven way. The rules are simply processed in order, first | ||
| 24 | * match wins. | ||
| 25 | * | ||
| 26 | * Note this is only used for machine check exceptions, the corrected | ||
| 27 | * errors use much simpler rules. The exceptions still check for the corrected | ||
| 28 | * errors, but only to leave them alone for the CMCI handler (except for | ||
| 29 | * panic situations) | ||
| 30 | */ | ||
| 31 | |||
| 32 | enum context { IN_KERNEL = 1, IN_USER = 2 }; | ||
| 33 | enum ser { SER_REQUIRED = 1, NO_SER = 2 }; | ||
| 34 | |||
| 35 | static struct severity { | ||
| 36 | u64 mask; | ||
| 37 | u64 result; | ||
| 38 | unsigned char sev; | ||
| 39 | unsigned char mcgmask; | ||
| 40 | unsigned char mcgres; | ||
| 41 | unsigned char ser; | ||
| 42 | unsigned char context; | ||
| 43 | unsigned char covered; | ||
| 44 | char *msg; | ||
| 45 | } severities[] = { | ||
| 46 | #define KERNEL .context = IN_KERNEL | ||
| 47 | #define USER .context = IN_USER | ||
| 48 | #define SER .ser = SER_REQUIRED | ||
| 49 | #define NOSER .ser = NO_SER | ||
| 50 | #define SEV(s) .sev = MCE_ ## s ## _SEVERITY | ||
| 51 | #define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } | ||
| 52 | #define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } | ||
| 53 | #define MCGMASK(x, res, s, m, r...) \ | ||
| 54 | { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } | ||
| 55 | #define MASK(x, y, s, m, r...) \ | ||
| 56 | { .mask = x, .result = y, SEV(s), .msg = m, ## r } | ||
| 57 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) | ||
| 58 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) | ||
| 59 | #define MCACOD 0xffff | ||
| 60 | |||
| 61 | BITCLR(MCI_STATUS_VAL, NO, "Invalid"), | ||
| 62 | BITCLR(MCI_STATUS_EN, NO, "Not enabled"), | ||
| 63 | BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), | ||
| 64 | /* When MCIP is not set something is very confused */ | ||
| 65 | MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"), | ||
| 66 | /* Neither return not error IP -- no chance to recover -> PANIC */ | ||
| 67 | MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC, | ||
| 68 | "Neither restart nor error IP"), | ||
| 69 | MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP", | ||
| 70 | KERNEL), | ||
| 71 | BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER), | ||
| 72 | MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME, | ||
| 73 | "Spurious not enabled", SER), | ||
| 74 | |||
| 75 | /* ignore OVER for UCNA */ | ||
| 76 | MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP, | ||
| 77 | "Uncorrected no action required", SER), | ||
| 78 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC, | ||
| 79 | "Illegal combination (UCNA with AR=1)", SER), | ||
| 80 | MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER), | ||
| 81 | |||
| 82 | /* AR add known MCACODs here */ | ||
| 83 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC, | ||
| 84 | "Action required with lost events", SER), | ||
| 85 | MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC, | ||
| 86 | "Action required; unknown MCACOD", SER), | ||
| 87 | |||
| 88 | /* known AO MCACODs: */ | ||
| 89 | MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO, | ||
| 90 | "Action optional: memory scrubbing error", SER), | ||
| 91 | MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO, | ||
| 92 | "Action optional: last level cache writeback error", SER), | ||
| 93 | |||
| 94 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME, | ||
| 95 | "Action optional unknown MCACOD", SER), | ||
| 96 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME, | ||
| 97 | "Action optional with lost events", SER), | ||
| 98 | BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), | ||
| 99 | BITSET(MCI_STATUS_UC, UC, "Uncorrected"), | ||
| 100 | BITSET(0, SOME, "No match") /* always matches. keep at end */ | ||
| 101 | }; | ||
| 102 | |||
| 103 | /* | ||
| 104 | * If the EIPV bit is set, it means the saved IP is the | ||
| 105 | * instruction which caused the MCE. | ||
| 106 | */ | ||
| 107 | static int error_context(struct mce *m) | ||
| 108 | { | ||
| 109 | if (m->mcgstatus & MCG_STATUS_EIPV) | ||
| 110 | return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL; | ||
| 111 | /* Unknown, assume kernel */ | ||
| 112 | return IN_KERNEL; | ||
| 113 | } | ||
| 114 | |||
| 115 | int mce_severity(struct mce *a, int tolerant, char **msg) | ||
| 116 | { | ||
| 117 | enum context ctx = error_context(a); | ||
| 118 | struct severity *s; | ||
| 119 | |||
| 120 | for (s = severities;; s++) { | ||
| 121 | if ((a->status & s->mask) != s->result) | ||
| 122 | continue; | ||
| 123 | if ((a->mcgstatus & s->mcgmask) != s->mcgres) | ||
| 124 | continue; | ||
| 125 | if (s->ser == SER_REQUIRED && !mce_ser) | ||
| 126 | continue; | ||
| 127 | if (s->ser == NO_SER && mce_ser) | ||
| 128 | continue; | ||
| 129 | if (s->context && ctx != s->context) | ||
| 130 | continue; | ||
| 131 | if (msg) | ||
| 132 | *msg = s->msg; | ||
| 133 | s->covered = 1; | ||
| 134 | if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) { | ||
| 135 | if (panic_on_oops || tolerant < 1) | ||
| 136 | return MCE_PANIC_SEVERITY; | ||
| 137 | } | ||
| 138 | return s->sev; | ||
| 139 | } | ||
| 140 | } | ||
| 141 | |||
| 142 | static void *s_start(struct seq_file *f, loff_t *pos) | ||
| 143 | { | ||
| 144 | if (*pos >= ARRAY_SIZE(severities)) | ||
| 145 | return NULL; | ||
| 146 | return &severities[*pos]; | ||
| 147 | } | ||
| 148 | |||
| 149 | static void *s_next(struct seq_file *f, void *data, loff_t *pos) | ||
| 150 | { | ||
| 151 | if (++(*pos) >= ARRAY_SIZE(severities)) | ||
| 152 | return NULL; | ||
| 153 | return &severities[*pos]; | ||
| 154 | } | ||
| 155 | |||
| 156 | static void s_stop(struct seq_file *f, void *data) | ||
| 157 | { | ||
| 158 | } | ||
| 159 | |||
| 160 | static int s_show(struct seq_file *f, void *data) | ||
| 161 | { | ||
| 162 | struct severity *ser = data; | ||
| 163 | seq_printf(f, "%d\t%s\n", ser->covered, ser->msg); | ||
| 164 | return 0; | ||
| 165 | } | ||
| 166 | |||
| 167 | static const struct seq_operations severities_seq_ops = { | ||
| 168 | .start = s_start, | ||
| 169 | .next = s_next, | ||
| 170 | .stop = s_stop, | ||
| 171 | .show = s_show, | ||
| 172 | }; | ||
| 173 | |||
| 174 | static int severities_coverage_open(struct inode *inode, struct file *file) | ||
| 175 | { | ||
| 176 | return seq_open(file, &severities_seq_ops); | ||
| 177 | } | ||
| 178 | |||
| 179 | static ssize_t severities_coverage_write(struct file *file, | ||
| 180 | const char __user *ubuf, | ||
| 181 | size_t count, loff_t *ppos) | ||
| 182 | { | ||
| 183 | int i; | ||
| 184 | for (i = 0; i < ARRAY_SIZE(severities); i++) | ||
| 185 | severities[i].covered = 0; | ||
| 186 | return count; | ||
| 187 | } | ||
| 188 | |||
| 189 | static const struct file_operations severities_coverage_fops = { | ||
| 190 | .open = severities_coverage_open, | ||
| 191 | .release = seq_release, | ||
| 192 | .read = seq_read, | ||
| 193 | .write = severities_coverage_write, | ||
| 194 | }; | ||
| 195 | |||
| 196 | static int __init severities_debugfs_init(void) | ||
| 197 | { | ||
| 198 | struct dentry *dmce = NULL, *fseverities_coverage = NULL; | ||
| 199 | |||
| 200 | dmce = debugfs_create_dir("mce", NULL); | ||
| 201 | if (dmce == NULL) | ||
| 202 | goto err_out; | ||
| 203 | fseverities_coverage = debugfs_create_file("severities-coverage", | ||
| 204 | 0444, dmce, NULL, | ||
| 205 | &severities_coverage_fops); | ||
| 206 | if (fseverities_coverage == NULL) | ||
| 207 | goto err_out; | ||
| 208 | |||
| 209 | return 0; | ||
| 210 | |||
| 211 | err_out: | ||
| 212 | if (fseverities_coverage) | ||
| 213 | debugfs_remove(fseverities_coverage); | ||
| 214 | if (dmce) | ||
| 215 | debugfs_remove(dmce); | ||
| 216 | return -ENOMEM; | ||
| 217 | } | ||
| 218 | late_initcall(severities_debugfs_init); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c new file mode 100644 index 000000000000..fabba15e4558 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
| @@ -0,0 +1,1964 @@ | |||
| 1 | /* | ||
| 2 | * Machine check handler. | ||
| 3 | * | ||
| 4 | * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. | ||
| 5 | * Rest from unknown author(s). | ||
| 6 | * 2004 Andi Kleen. Rewrote most of it. | ||
| 7 | * Copyright 2008 Intel Corporation | ||
| 8 | * Author: Andi Kleen | ||
| 9 | */ | ||
| 10 | #include <linux/thread_info.h> | ||
| 11 | #include <linux/capability.h> | ||
| 12 | #include <linux/miscdevice.h> | ||
| 13 | #include <linux/interrupt.h> | ||
| 14 | #include <linux/ratelimit.h> | ||
| 15 | #include <linux/kallsyms.h> | ||
| 16 | #include <linux/rcupdate.h> | ||
| 17 | #include <linux/kobject.h> | ||
| 18 | #include <linux/uaccess.h> | ||
| 19 | #include <linux/kdebug.h> | ||
| 20 | #include <linux/kernel.h> | ||
| 21 | #include <linux/percpu.h> | ||
| 22 | #include <linux/string.h> | ||
| 23 | #include <linux/sysdev.h> | ||
| 24 | #include <linux/delay.h> | ||
| 25 | #include <linux/ctype.h> | ||
| 26 | #include <linux/sched.h> | ||
| 27 | #include <linux/sysfs.h> | ||
| 28 | #include <linux/types.h> | ||
| 29 | #include <linux/init.h> | ||
| 30 | #include <linux/kmod.h> | ||
| 31 | #include <linux/poll.h> | ||
| 32 | #include <linux/nmi.h> | ||
| 33 | #include <linux/cpu.h> | ||
| 34 | #include <linux/smp.h> | ||
| 35 | #include <linux/fs.h> | ||
| 36 | #include <linux/mm.h> | ||
| 37 | |||
| 38 | #include <asm/processor.h> | ||
| 39 | #include <asm/hw_irq.h> | ||
| 40 | #include <asm/apic.h> | ||
| 41 | #include <asm/idle.h> | ||
| 42 | #include <asm/ipi.h> | ||
| 43 | #include <asm/mce.h> | ||
| 44 | #include <asm/msr.h> | ||
| 45 | |||
| 46 | #include "mce-internal.h" | ||
| 47 | #include "mce.h" | ||
| 48 | |||
| 49 | /* Handle unconfigured int18 (should never happen) */ | ||
| 50 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | ||
| 51 | { | ||
| 52 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", | ||
| 53 | smp_processor_id()); | ||
| 54 | } | ||
| 55 | |||
| 56 | /* Call the installed machine check handler for this CPU setup. */ | ||
| 57 | void (*machine_check_vector)(struct pt_regs *, long error_code) = | ||
| 58 | unexpected_machine_check; | ||
| 59 | |||
| 60 | int mce_disabled; | ||
| 61 | |||
| 62 | #ifdef CONFIG_X86_NEW_MCE | ||
| 63 | |||
| 64 | #define MISC_MCELOG_MINOR 227 | ||
| 65 | |||
| 66 | #define SPINUNIT 100 /* 100ns */ | ||
| 67 | |||
| 68 | atomic_t mce_entry; | ||
| 69 | |||
| 70 | DEFINE_PER_CPU(unsigned, mce_exception_count); | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Tolerant levels: | ||
| 74 | * 0: always panic on uncorrected errors, log corrected errors | ||
| 75 | * 1: panic or SIGBUS on uncorrected errors, log corrected errors | ||
| 76 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors | ||
| 77 | * 3: never panic or SIGBUS, log all errors (for testing only) | ||
| 78 | */ | ||
| 79 | static int tolerant = 1; | ||
| 80 | static int banks; | ||
| 81 | static u64 *bank; | ||
| 82 | static unsigned long notify_user; | ||
| 83 | static int rip_msr; | ||
| 84 | static int mce_bootlog = -1; | ||
| 85 | static int monarch_timeout = -1; | ||
| 86 | static int mce_panic_timeout; | ||
| 87 | static int mce_dont_log_ce; | ||
| 88 | int mce_cmci_disabled; | ||
| 89 | int mce_ignore_ce; | ||
| 90 | int mce_ser; | ||
| 91 | |||
| 92 | static char trigger[128]; | ||
| 93 | static char *trigger_argv[2] = { trigger, NULL }; | ||
| 94 | |||
| 95 | static unsigned long dont_init_banks; | ||
| 96 | |||
| 97 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | ||
| 98 | static DEFINE_PER_CPU(struct mce, mces_seen); | ||
| 99 | static int cpu_missing; | ||
| 100 | |||
| 101 | |||
| 102 | /* MCA banks polled by the period polling timer for corrected events */ | ||
| 103 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | ||
| 104 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | ||
| 105 | }; | ||
| 106 | |||
| 107 | static inline int skip_bank_init(int i) | ||
| 108 | { | ||
| 109 | return i < BITS_PER_LONG && test_bit(i, &dont_init_banks); | ||
| 110 | } | ||
| 111 | |||
| 112 | static DEFINE_PER_CPU(struct work_struct, mce_work); | ||
| 113 | |||
| 114 | /* Do initial initialization of a struct mce */ | ||
| 115 | void mce_setup(struct mce *m) | ||
| 116 | { | ||
| 117 | memset(m, 0, sizeof(struct mce)); | ||
| 118 | m->cpu = m->extcpu = smp_processor_id(); | ||
| 119 | rdtscll(m->tsc); | ||
| 120 | /* We hope get_seconds stays lockless */ | ||
| 121 | m->time = get_seconds(); | ||
| 122 | m->cpuvendor = boot_cpu_data.x86_vendor; | ||
| 123 | m->cpuid = cpuid_eax(1); | ||
| 124 | #ifdef CONFIG_SMP | ||
| 125 | m->socketid = cpu_data(m->extcpu).phys_proc_id; | ||
| 126 | #endif | ||
| 127 | m->apicid = cpu_data(m->extcpu).initial_apicid; | ||
| 128 | rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); | ||
| 129 | } | ||
| 130 | |||
| 131 | DEFINE_PER_CPU(struct mce, injectm); | ||
| 132 | EXPORT_PER_CPU_SYMBOL_GPL(injectm); | ||
| 133 | |||
| 134 | /* | ||
| 135 | * Lockless MCE logging infrastructure. | ||
| 136 | * This avoids deadlocks on printk locks without having to break locks. Also | ||
| 137 | * separate MCEs from kernel messages to avoid bogus bug reports. | ||
| 138 | */ | ||
| 139 | |||
| 140 | static struct mce_log mcelog = { | ||
| 141 | .signature = MCE_LOG_SIGNATURE, | ||
| 142 | .len = MCE_LOG_LEN, | ||
| 143 | .recordlen = sizeof(struct mce), | ||
| 144 | }; | ||
| 145 | |||
| 146 | void mce_log(struct mce *mce) | ||
| 147 | { | ||
| 148 | unsigned next, entry; | ||
| 149 | |||
| 150 | mce->finished = 0; | ||
| 151 | wmb(); | ||
| 152 | for (;;) { | ||
| 153 | entry = rcu_dereference(mcelog.next); | ||
| 154 | for (;;) { | ||
| 155 | /* | ||
| 156 | * When the buffer fills up discard new entries. | ||
| 157 | * Assume that the earlier errors are the more | ||
| 158 | * interesting ones: | ||
| 159 | */ | ||
| 160 | if (entry >= MCE_LOG_LEN) { | ||
| 161 | set_bit(MCE_OVERFLOW, | ||
| 162 | (unsigned long *)&mcelog.flags); | ||
| 163 | return; | ||
| 164 | } | ||
| 165 | /* Old left over entry. Skip: */ | ||
| 166 | if (mcelog.entry[entry].finished) { | ||
| 167 | entry++; | ||
| 168 | continue; | ||
| 169 | } | ||
| 170 | break; | ||
| 171 | } | ||
| 172 | smp_rmb(); | ||
| 173 | next = entry + 1; | ||
| 174 | if (cmpxchg(&mcelog.next, entry, next) == entry) | ||
| 175 | break; | ||
| 176 | } | ||
| 177 | memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); | ||
| 178 | wmb(); | ||
| 179 | mcelog.entry[entry].finished = 1; | ||
| 180 | wmb(); | ||
| 181 | |||
| 182 | mce->finished = 1; | ||
| 183 | set_bit(0, ¬ify_user); | ||
| 184 | } | ||
| 185 | |||
| 186 | static void print_mce(struct mce *m) | ||
| 187 | { | ||
| 188 | printk(KERN_EMERG | ||
| 189 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | ||
| 190 | m->extcpu, m->mcgstatus, m->bank, m->status); | ||
| 191 | if (m->ip) { | ||
| 192 | printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", | ||
| 193 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | ||
| 194 | m->cs, m->ip); | ||
| 195 | if (m->cs == __KERNEL_CS) | ||
| 196 | print_symbol("{%s}", m->ip); | ||
| 197 | printk("\n"); | ||
| 198 | } | ||
| 199 | printk(KERN_EMERG "TSC %llx ", m->tsc); | ||
| 200 | if (m->addr) | ||
| 201 | printk("ADDR %llx ", m->addr); | ||
| 202 | if (m->misc) | ||
| 203 | printk("MISC %llx ", m->misc); | ||
| 204 | printk("\n"); | ||
| 205 | printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | ||
| 206 | m->cpuvendor, m->cpuid, m->time, m->socketid, | ||
| 207 | m->apicid); | ||
| 208 | } | ||
| 209 | |||
| 210 | static void print_mce_head(void) | ||
| 211 | { | ||
| 212 | printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n"); | ||
| 213 | } | ||
| 214 | |||
| 215 | static void print_mce_tail(void) | ||
| 216 | { | ||
| 217 | printk(KERN_EMERG "This is not a software problem!\n" | ||
| 218 | KERN_EMERG "Run through mcelog --ascii to decode and contact your hardware vendor\n"); | ||
| 219 | } | ||
| 220 | |||
| 221 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | ||
| 222 | |||
| 223 | static atomic_t mce_paniced; | ||
| 224 | |||
| 225 | /* Panic in progress. Enable interrupts and wait for final IPI */ | ||
| 226 | static void wait_for_panic(void) | ||
| 227 | { | ||
| 228 | long timeout = PANIC_TIMEOUT*USEC_PER_SEC; | ||
| 229 | preempt_disable(); | ||
| 230 | local_irq_enable(); | ||
| 231 | while (timeout-- > 0) | ||
| 232 | udelay(1); | ||
| 233 | if (panic_timeout == 0) | ||
| 234 | panic_timeout = mce_panic_timeout; | ||
| 235 | panic("Panicing machine check CPU died"); | ||
| 236 | } | ||
| 237 | |||
| 238 | static void mce_panic(char *msg, struct mce *final, char *exp) | ||
| 239 | { | ||
| 240 | int i; | ||
| 241 | |||
| 242 | /* | ||
| 243 | * Make sure only one CPU runs in machine check panic | ||
| 244 | */ | ||
| 245 | if (atomic_add_return(1, &mce_paniced) > 1) | ||
| 246 | wait_for_panic(); | ||
| 247 | barrier(); | ||
| 248 | |||
| 249 | bust_spinlocks(1); | ||
| 250 | console_verbose(); | ||
| 251 | print_mce_head(); | ||
| 252 | /* First print corrected ones that are still unlogged */ | ||
| 253 | for (i = 0; i < MCE_LOG_LEN; i++) { | ||
| 254 | struct mce *m = &mcelog.entry[i]; | ||
| 255 | if (!(m->status & MCI_STATUS_VAL)) | ||
| 256 | continue; | ||
| 257 | if (!(m->status & MCI_STATUS_UC)) | ||
| 258 | print_mce(m); | ||
| 259 | } | ||
| 260 | /* Now print uncorrected but with the final one last */ | ||
| 261 | for (i = 0; i < MCE_LOG_LEN; i++) { | ||
| 262 | struct mce *m = &mcelog.entry[i]; | ||
| 263 | if (!(m->status & MCI_STATUS_VAL)) | ||
| 264 | continue; | ||
| 265 | if (!(m->status & MCI_STATUS_UC)) | ||
| 266 | continue; | ||
| 267 | if (!final || memcmp(m, final, sizeof(struct mce))) | ||
| 268 | print_mce(m); | ||
| 269 | } | ||
| 270 | if (final) | ||
| 271 | print_mce(final); | ||
| 272 | if (cpu_missing) | ||
| 273 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); | ||
| 274 | print_mce_tail(); | ||
| 275 | if (exp) | ||
| 276 | printk(KERN_EMERG "Machine check: %s\n", exp); | ||
| 277 | if (panic_timeout == 0) | ||
| 278 | panic_timeout = mce_panic_timeout; | ||
| 279 | panic(msg); | ||
| 280 | } | ||
| 281 | |||
| 282 | /* Support code for software error injection */ | ||
| 283 | |||
| 284 | static int msr_to_offset(u32 msr) | ||
| 285 | { | ||
| 286 | unsigned bank = __get_cpu_var(injectm.bank); | ||
| 287 | if (msr == rip_msr) | ||
| 288 | return offsetof(struct mce, ip); | ||
| 289 | if (msr == MSR_IA32_MC0_STATUS + bank*4) | ||
| 290 | return offsetof(struct mce, status); | ||
| 291 | if (msr == MSR_IA32_MC0_ADDR + bank*4) | ||
| 292 | return offsetof(struct mce, addr); | ||
| 293 | if (msr == MSR_IA32_MC0_MISC + bank*4) | ||
| 294 | return offsetof(struct mce, misc); | ||
| 295 | if (msr == MSR_IA32_MCG_STATUS) | ||
| 296 | return offsetof(struct mce, mcgstatus); | ||
| 297 | return -1; | ||
| 298 | } | ||
| 299 | |||
| 300 | /* MSR access wrappers used for error injection */ | ||
| 301 | static u64 mce_rdmsrl(u32 msr) | ||
| 302 | { | ||
| 303 | u64 v; | ||
| 304 | if (__get_cpu_var(injectm).finished) { | ||
| 305 | int offset = msr_to_offset(msr); | ||
| 306 | if (offset < 0) | ||
| 307 | return 0; | ||
| 308 | return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); | ||
| 309 | } | ||
| 310 | rdmsrl(msr, v); | ||
| 311 | return v; | ||
| 312 | } | ||
| 313 | |||
| 314 | static void mce_wrmsrl(u32 msr, u64 v) | ||
| 315 | { | ||
| 316 | if (__get_cpu_var(injectm).finished) { | ||
| 317 | int offset = msr_to_offset(msr); | ||
| 318 | if (offset >= 0) | ||
| 319 | *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; | ||
| 320 | return; | ||
| 321 | } | ||
| 322 | wrmsrl(msr, v); | ||
| 323 | } | ||
| 324 | |||
| 325 | /* | ||
| 326 | * Simple lockless ring to communicate PFNs from the exception handler with the | ||
| 327 | * process context work function. This is vastly simplified because there's | ||
| 328 | * only a single reader and a single writer. | ||
| 329 | */ | ||
| 330 | #define MCE_RING_SIZE 16 /* we use one entry less */ | ||
| 331 | |||
| 332 | struct mce_ring { | ||
| 333 | unsigned short start; | ||
| 334 | unsigned short end; | ||
| 335 | unsigned long ring[MCE_RING_SIZE]; | ||
| 336 | }; | ||
| 337 | static DEFINE_PER_CPU(struct mce_ring, mce_ring); | ||
| 338 | |||
| 339 | /* Runs with CPU affinity in workqueue */ | ||
| 340 | static int mce_ring_empty(void) | ||
| 341 | { | ||
| 342 | struct mce_ring *r = &__get_cpu_var(mce_ring); | ||
| 343 | |||
| 344 | return r->start == r->end; | ||
| 345 | } | ||
| 346 | |||
| 347 | static int mce_ring_get(unsigned long *pfn) | ||
| 348 | { | ||
| 349 | struct mce_ring *r; | ||
| 350 | int ret = 0; | ||
| 351 | |||
| 352 | *pfn = 0; | ||
| 353 | get_cpu(); | ||
| 354 | r = &__get_cpu_var(mce_ring); | ||
| 355 | if (r->start == r->end) | ||
| 356 | goto out; | ||
| 357 | *pfn = r->ring[r->start]; | ||
| 358 | r->start = (r->start + 1) % MCE_RING_SIZE; | ||
| 359 | ret = 1; | ||
| 360 | out: | ||
| 361 | put_cpu(); | ||
| 362 | return ret; | ||
| 363 | } | ||
| 364 | |||
| 365 | /* Always runs in MCE context with preempt off */ | ||
| 366 | static int mce_ring_add(unsigned long pfn) | ||
| 367 | { | ||
| 368 | struct mce_ring *r = &__get_cpu_var(mce_ring); | ||
| 369 | unsigned next; | ||
| 370 | |||
| 371 | next = (r->end + 1) % MCE_RING_SIZE; | ||
| 372 | if (next == r->start) | ||
| 373 | return -1; | ||
| 374 | r->ring[r->end] = pfn; | ||
| 375 | wmb(); | ||
| 376 | r->end = next; | ||
| 377 | return 0; | ||
| 378 | } | ||
| 379 | |||
| 380 | int mce_available(struct cpuinfo_x86 *c) | ||
| 381 | { | ||
| 382 | if (mce_disabled) | ||
| 383 | return 0; | ||
| 384 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); | ||
| 385 | } | ||
| 386 | |||
| 387 | static void mce_schedule_work(void) | ||
| 388 | { | ||
| 389 | if (!mce_ring_empty()) { | ||
| 390 | struct work_struct *work = &__get_cpu_var(mce_work); | ||
| 391 | if (!work_pending(work)) | ||
| 392 | schedule_work(work); | ||
| 393 | } | ||
| 394 | } | ||
| 395 | |||
| 396 | /* | ||
| 397 | * Get the address of the instruction at the time of the machine check | ||
| 398 | * error. | ||
| 399 | */ | ||
| 400 | static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) | ||
| 401 | { | ||
| 402 | |||
| 403 | if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) { | ||
| 404 | m->ip = regs->ip; | ||
| 405 | m->cs = regs->cs; | ||
| 406 | } else { | ||
| 407 | m->ip = 0; | ||
| 408 | m->cs = 0; | ||
| 409 | } | ||
| 410 | if (rip_msr) | ||
| 411 | m->ip = mce_rdmsrl(rip_msr); | ||
| 412 | } | ||
| 413 | |||
| 414 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 415 | /* | ||
| 416 | * Called after interrupts have been reenabled again | ||
| 417 | * when a MCE happened during an interrupts off region | ||
| 418 | * in the kernel. | ||
| 419 | */ | ||
| 420 | asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs) | ||
| 421 | { | ||
| 422 | ack_APIC_irq(); | ||
| 423 | exit_idle(); | ||
| 424 | irq_enter(); | ||
| 425 | mce_notify_irq(); | ||
| 426 | mce_schedule_work(); | ||
| 427 | irq_exit(); | ||
| 428 | } | ||
| 429 | #endif | ||
| 430 | |||
| 431 | static void mce_report_event(struct pt_regs *regs) | ||
| 432 | { | ||
| 433 | if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { | ||
| 434 | mce_notify_irq(); | ||
| 435 | /* | ||
| 436 | * Triggering the work queue here is just an insurance | ||
| 437 | * policy in case the syscall exit notify handler | ||
| 438 | * doesn't run soon enough or ends up running on the | ||
| 439 | * wrong CPU (can happen when audit sleeps) | ||
| 440 | */ | ||
| 441 | mce_schedule_work(); | ||
| 442 | return; | ||
| 443 | } | ||
| 444 | |||
| 445 | #ifdef CONFIG_X86_LOCAL_APIC | ||
| 446 | /* | ||
| 447 | * Without APIC do not notify. The event will be picked | ||
| 448 | * up eventually. | ||
| 449 | */ | ||
| 450 | if (!cpu_has_apic) | ||
| 451 | return; | ||
| 452 | |||
| 453 | /* | ||
| 454 | * When interrupts are disabled we cannot use | ||
| 455 | * kernel services safely. Trigger an self interrupt | ||
| 456 | * through the APIC to instead do the notification | ||
| 457 | * after interrupts are reenabled again. | ||
| 458 | */ | ||
| 459 | apic->send_IPI_self(MCE_SELF_VECTOR); | ||
| 460 | |||
| 461 | /* | ||
| 462 | * Wait for idle afterwards again so that we don't leave the | ||
| 463 | * APIC in a non idle state because the normal APIC writes | ||
| 464 | * cannot exclude us. | ||
| 465 | */ | ||
| 466 | apic_wait_icr_idle(); | ||
| 467 | #endif | ||
| 468 | } | ||
| 469 | |||
| 470 | DEFINE_PER_CPU(unsigned, mce_poll_count); | ||
| 471 | |||
| 472 | /* | ||
| 473 | * Poll for corrected events or events that happened before reset. | ||
| 474 | * Those are just logged through /dev/mcelog. | ||
| 475 | * | ||
| 476 | * This is executed in standard interrupt context. | ||
| 477 | * | ||
| 478 | * Note: spec recommends to panic for fatal unsignalled | ||
| 479 | * errors here. However this would be quite problematic -- | ||
| 480 | * we would need to reimplement the Monarch handling and | ||
| 481 | * it would mess up the exclusion between exception handler | ||
| 482 | * and poll hander -- * so we skip this for now. | ||
| 483 | * These cases should not happen anyways, or only when the CPU | ||
| 484 | * is already totally * confused. In this case it's likely it will | ||
| 485 | * not fully execute the machine check handler either. | ||
| 486 | */ | ||
| 487 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | ||
| 488 | { | ||
| 489 | struct mce m; | ||
| 490 | int i; | ||
| 491 | |||
| 492 | __get_cpu_var(mce_poll_count)++; | ||
| 493 | |||
| 494 | mce_setup(&m); | ||
| 495 | |||
| 496 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | ||
| 497 | for (i = 0; i < banks; i++) { | ||
| 498 | if (!bank[i] || !test_bit(i, *b)) | ||
| 499 | continue; | ||
| 500 | |||
| 501 | m.misc = 0; | ||
| 502 | m.addr = 0; | ||
| 503 | m.bank = i; | ||
| 504 | m.tsc = 0; | ||
| 505 | |||
| 506 | barrier(); | ||
| 507 | m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | ||
| 508 | if (!(m.status & MCI_STATUS_VAL)) | ||
| 509 | continue; | ||
| 510 | |||
| 511 | /* | ||
| 512 | * Uncorrected or signalled events are handled by the exception | ||
| 513 | * handler when it is enabled, so don't process those here. | ||
| 514 | * | ||
| 515 | * TBD do the same check for MCI_STATUS_EN here? | ||
| 516 | */ | ||
| 517 | if (!(flags & MCP_UC) && | ||
| 518 | (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) | ||
| 519 | continue; | ||
| 520 | |||
| 521 | if (m.status & MCI_STATUS_MISCV) | ||
| 522 | m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); | ||
| 523 | if (m.status & MCI_STATUS_ADDRV) | ||
| 524 | m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); | ||
| 525 | |||
| 526 | if (!(flags & MCP_TIMESTAMP)) | ||
| 527 | m.tsc = 0; | ||
| 528 | /* | ||
| 529 | * Don't get the IP here because it's unlikely to | ||
| 530 | * have anything to do with the actual error location. | ||
| 531 | */ | ||
| 532 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { | ||
| 533 | mce_log(&m); | ||
| 534 | add_taint(TAINT_MACHINE_CHECK); | ||
| 535 | } | ||
| 536 | |||
| 537 | /* | ||
| 538 | * Clear state for this bank. | ||
| 539 | */ | ||
| 540 | mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
| 541 | } | ||
| 542 | |||
| 543 | /* | ||
| 544 | * Don't clear MCG_STATUS here because it's only defined for | ||
| 545 | * exceptions. | ||
| 546 | */ | ||
| 547 | |||
| 548 | sync_core(); | ||
| 549 | } | ||
| 550 | EXPORT_SYMBOL_GPL(machine_check_poll); | ||
| 551 | |||
| 552 | /* | ||
| 553 | * Do a quick check if any of the events requires a panic. | ||
| 554 | * This decides if we keep the events around or clear them. | ||
| 555 | */ | ||
| 556 | static int mce_no_way_out(struct mce *m, char **msg) | ||
| 557 | { | ||
| 558 | int i; | ||
| 559 | |||
| 560 | for (i = 0; i < banks; i++) { | ||
| 561 | m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | ||
| 562 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) | ||
| 563 | return 1; | ||
| 564 | } | ||
| 565 | return 0; | ||
| 566 | } | ||
| 567 | |||
| 568 | /* | ||
| 569 | * Variable to establish order between CPUs while scanning. | ||
| 570 | * Each CPU spins initially until executing is equal its number. | ||
| 571 | */ | ||
| 572 | static atomic_t mce_executing; | ||
| 573 | |||
| 574 | /* | ||
| 575 | * Defines order of CPUs on entry. First CPU becomes Monarch. | ||
| 576 | */ | ||
| 577 | static atomic_t mce_callin; | ||
| 578 | |||
| 579 | /* | ||
| 580 | * Check if a timeout waiting for other CPUs happened. | ||
| 581 | */ | ||
| 582 | static int mce_timed_out(u64 *t) | ||
| 583 | { | ||
| 584 | /* | ||
| 585 | * The others already did panic for some reason. | ||
| 586 | * Bail out like in a timeout. | ||
| 587 | * rmb() to tell the compiler that system_state | ||
| 588 | * might have been modified by someone else. | ||
| 589 | */ | ||
| 590 | rmb(); | ||
| 591 | if (atomic_read(&mce_paniced)) | ||
| 592 | wait_for_panic(); | ||
| 593 | if (!monarch_timeout) | ||
| 594 | goto out; | ||
| 595 | if ((s64)*t < SPINUNIT) { | ||
| 596 | /* CHECKME: Make panic default for 1 too? */ | ||
| 597 | if (tolerant < 1) | ||
| 598 | mce_panic("Timeout synchronizing machine check over CPUs", | ||
| 599 | NULL, NULL); | ||
| 600 | cpu_missing = 1; | ||
| 601 | return 1; | ||
| 602 | } | ||
| 603 | *t -= SPINUNIT; | ||
| 604 | out: | ||
| 605 | touch_nmi_watchdog(); | ||
| 606 | return 0; | ||
| 607 | } | ||
| 608 | |||
| 609 | /* | ||
| 610 | * The Monarch's reign. The Monarch is the CPU who entered | ||
| 611 | * the machine check handler first. It waits for the others to | ||
| 612 | * raise the exception too and then grades them. When any | ||
| 613 | * error is fatal panic. Only then let the others continue. | ||
| 614 | * | ||
| 615 | * The other CPUs entering the MCE handler will be controlled by the | ||
| 616 | * Monarch. They are called Subjects. | ||
| 617 | * | ||
| 618 | * This way we prevent any potential data corruption in a unrecoverable case | ||
| 619 | * and also makes sure always all CPU's errors are examined. | ||
| 620 | * | ||
| 621 | * Also this detects the case of an machine check event coming from outer | ||
| 622 | * space (not detected by any CPUs) In this case some external agent wants | ||
| 623 | * us to shut down, so panic too. | ||
| 624 | * | ||
| 625 | * The other CPUs might still decide to panic if the handler happens | ||
| 626 | * in a unrecoverable place, but in this case the system is in a semi-stable | ||
| 627 | * state and won't corrupt anything by itself. It's ok to let the others | ||
| 628 | * continue for a bit first. | ||
| 629 | * | ||
| 630 | * All the spin loops have timeouts; when a timeout happens a CPU | ||
| 631 | * typically elects itself to be Monarch. | ||
| 632 | */ | ||
| 633 | static void mce_reign(void) | ||
| 634 | { | ||
| 635 | int cpu; | ||
| 636 | struct mce *m = NULL; | ||
| 637 | int global_worst = 0; | ||
| 638 | char *msg = NULL; | ||
| 639 | char *nmsg = NULL; | ||
| 640 | |||
| 641 | /* | ||
| 642 | * This CPU is the Monarch and the other CPUs have run | ||
| 643 | * through their handlers. | ||
| 644 | * Grade the severity of the errors of all the CPUs. | ||
| 645 | */ | ||
| 646 | for_each_possible_cpu(cpu) { | ||
| 647 | int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant, | ||
| 648 | &nmsg); | ||
| 649 | if (severity > global_worst) { | ||
| 650 | msg = nmsg; | ||
| 651 | global_worst = severity; | ||
| 652 | m = &per_cpu(mces_seen, cpu); | ||
| 653 | } | ||
| 654 | } | ||
| 655 | |||
| 656 | /* | ||
| 657 | * Cannot recover? Panic here then. | ||
| 658 | * This dumps all the mces in the log buffer and stops the | ||
| 659 | * other CPUs. | ||
| 660 | */ | ||
| 661 | if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3) | ||
| 662 | mce_panic("Fatal Machine check", m, msg); | ||
| 663 | |||
| 664 | /* | ||
| 665 | * For UC somewhere we let the CPU who detects it handle it. | ||
| 666 | * Also must let continue the others, otherwise the handling | ||
| 667 | * CPU could deadlock on a lock. | ||
| 668 | */ | ||
| 669 | |||
| 670 | /* | ||
| 671 | * No machine check event found. Must be some external | ||
| 672 | * source or one CPU is hung. Panic. | ||
| 673 | */ | ||
| 674 | if (!m && tolerant < 3) | ||
| 675 | mce_panic("Machine check from unknown source", NULL, NULL); | ||
| 676 | |||
| 677 | /* | ||
| 678 | * Now clear all the mces_seen so that they don't reappear on | ||
| 679 | * the next mce. | ||
| 680 | */ | ||
| 681 | for_each_possible_cpu(cpu) | ||
| 682 | memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce)); | ||
| 683 | } | ||
| 684 | |||
| 685 | static atomic_t global_nwo; | ||
| 686 | |||
| 687 | /* | ||
| 688 | * Start of Monarch synchronization. This waits until all CPUs have | ||
| 689 | * entered the exception handler and then determines if any of them | ||
| 690 | * saw a fatal event that requires panic. Then it executes them | ||
| 691 | * in the entry order. | ||
| 692 | * TBD double check parallel CPU hotunplug | ||
| 693 | */ | ||
| 694 | static int mce_start(int no_way_out, int *order) | ||
| 695 | { | ||
| 696 | int nwo; | ||
| 697 | int cpus = num_online_cpus(); | ||
| 698 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; | ||
| 699 | |||
| 700 | if (!timeout) { | ||
| 701 | *order = -1; | ||
| 702 | return no_way_out; | ||
| 703 | } | ||
| 704 | |||
| 705 | atomic_add(no_way_out, &global_nwo); | ||
| 706 | |||
| 707 | /* | ||
| 708 | * Wait for everyone. | ||
| 709 | */ | ||
| 710 | while (atomic_read(&mce_callin) != cpus) { | ||
| 711 | if (mce_timed_out(&timeout)) { | ||
| 712 | atomic_set(&global_nwo, 0); | ||
| 713 | *order = -1; | ||
| 714 | return no_way_out; | ||
| 715 | } | ||
| 716 | ndelay(SPINUNIT); | ||
| 717 | } | ||
| 718 | |||
| 719 | /* | ||
| 720 | * Cache the global no_way_out state. | ||
| 721 | */ | ||
| 722 | nwo = atomic_read(&global_nwo); | ||
| 723 | |||
| 724 | /* | ||
| 725 | * Monarch starts executing now, the others wait. | ||
| 726 | */ | ||
| 727 | if (*order == 1) { | ||
| 728 | atomic_set(&mce_executing, 1); | ||
| 729 | return nwo; | ||
| 730 | } | ||
| 731 | |||
| 732 | /* | ||
| 733 | * Now start the scanning loop one by one | ||
| 734 | * in the original callin order. | ||
| 735 | * This way when there are any shared banks it will | ||
| 736 | * be only seen by one CPU before cleared, avoiding duplicates. | ||
| 737 | */ | ||
| 738 | while (atomic_read(&mce_executing) < *order) { | ||
| 739 | if (mce_timed_out(&timeout)) { | ||
| 740 | atomic_set(&global_nwo, 0); | ||
| 741 | *order = -1; | ||
| 742 | return no_way_out; | ||
| 743 | } | ||
| 744 | ndelay(SPINUNIT); | ||
| 745 | } | ||
| 746 | return nwo; | ||
| 747 | } | ||
| 748 | |||
| 749 | /* | ||
| 750 | * Synchronize between CPUs after main scanning loop. | ||
| 751 | * This invokes the bulk of the Monarch processing. | ||
| 752 | */ | ||
| 753 | static int mce_end(int order) | ||
| 754 | { | ||
| 755 | int ret = -1; | ||
| 756 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; | ||
| 757 | |||
| 758 | if (!timeout) | ||
| 759 | goto reset; | ||
| 760 | if (order < 0) | ||
| 761 | goto reset; | ||
| 762 | |||
| 763 | /* | ||
| 764 | * Allow others to run. | ||
| 765 | */ | ||
| 766 | atomic_inc(&mce_executing); | ||
| 767 | |||
| 768 | if (order == 1) { | ||
| 769 | /* CHECKME: Can this race with a parallel hotplug? */ | ||
| 770 | int cpus = num_online_cpus(); | ||
| 771 | |||
| 772 | /* | ||
| 773 | * Monarch: Wait for everyone to go through their scanning | ||
| 774 | * loops. | ||
| 775 | */ | ||
| 776 | while (atomic_read(&mce_executing) <= cpus) { | ||
| 777 | if (mce_timed_out(&timeout)) | ||
| 778 | goto reset; | ||
| 779 | ndelay(SPINUNIT); | ||
| 780 | } | ||
| 781 | |||
| 782 | mce_reign(); | ||
| 783 | barrier(); | ||
| 784 | ret = 0; | ||
| 785 | } else { | ||
| 786 | /* | ||
| 787 | * Subject: Wait for Monarch to finish. | ||
| 788 | */ | ||
| 789 | while (atomic_read(&mce_executing) != 0) { | ||
| 790 | if (mce_timed_out(&timeout)) | ||
| 791 | goto reset; | ||
| 792 | ndelay(SPINUNIT); | ||
| 793 | } | ||
| 794 | |||
| 795 | /* | ||
| 796 | * Don't reset anything. That's done by the Monarch. | ||
| 797 | */ | ||
| 798 | return 0; | ||
| 799 | } | ||
| 800 | |||
| 801 | /* | ||
| 802 | * Reset all global state. | ||
| 803 | */ | ||
| 804 | reset: | ||
| 805 | atomic_set(&global_nwo, 0); | ||
| 806 | atomic_set(&mce_callin, 0); | ||
| 807 | barrier(); | ||
| 808 | |||
| 809 | /* | ||
| 810 | * Let others run again. | ||
| 811 | */ | ||
| 812 | atomic_set(&mce_executing, 0); | ||
| 813 | return ret; | ||
| 814 | } | ||
| 815 | |||
| 816 | /* | ||
| 817 | * Check if the address reported by the CPU is in a format we can parse. | ||
| 818 | * It would be possible to add code for most other cases, but all would | ||
| 819 | * be somewhat complicated (e.g. segment offset would require an instruction | ||
| 820 | * parser). So only support physical addresses upto page granuality for now. | ||
| 821 | */ | ||
| 822 | static int mce_usable_address(struct mce *m) | ||
| 823 | { | ||
| 824 | if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) | ||
| 825 | return 0; | ||
| 826 | if ((m->misc & 0x3f) > PAGE_SHIFT) | ||
| 827 | return 0; | ||
| 828 | if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS) | ||
| 829 | return 0; | ||
| 830 | return 1; | ||
| 831 | } | ||
| 832 | |||
| 833 | static void mce_clear_state(unsigned long *toclear) | ||
| 834 | { | ||
| 835 | int i; | ||
| 836 | |||
| 837 | for (i = 0; i < banks; i++) { | ||
| 838 | if (test_bit(i, toclear)) | ||
| 839 | mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
| 840 | } | ||
| 841 | } | ||
| 842 | |||
| 843 | /* | ||
| 844 | * The actual machine check handler. This only handles real | ||
| 845 | * exceptions when something got corrupted coming in through int 18. | ||
| 846 | * | ||
| 847 | * This is executed in NMI context not subject to normal locking rules. This | ||
| 848 | * implies that most kernel services cannot be safely used. Don't even | ||
| 849 | * think about putting a printk in there! | ||
| 850 | * | ||
| 851 | * On Intel systems this is entered on all CPUs in parallel through | ||
| 852 | * MCE broadcast. However some CPUs might be broken beyond repair, | ||
| 853 | * so be always careful when synchronizing with others. | ||
| 854 | */ | ||
| 855 | void do_machine_check(struct pt_regs *regs, long error_code) | ||
| 856 | { | ||
| 857 | struct mce m, *final; | ||
| 858 | int i; | ||
| 859 | int worst = 0; | ||
| 860 | int severity; | ||
| 861 | /* | ||
| 862 | * Establish sequential order between the CPUs entering the machine | ||
| 863 | * check handler. | ||
| 864 | */ | ||
| 865 | int order; | ||
| 866 | |||
| 867 | /* | ||
| 868 | * If no_way_out gets set, there is no safe way to recover from this | ||
| 869 | * MCE. If tolerant is cranked up, we'll try anyway. | ||
| 870 | */ | ||
| 871 | int no_way_out = 0; | ||
| 872 | /* | ||
| 873 | * If kill_it gets set, there might be a way to recover from this | ||
| 874 | * error. | ||
| 875 | */ | ||
| 876 | int kill_it = 0; | ||
| 877 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | ||
| 878 | char *msg = "Unknown"; | ||
| 879 | |||
| 880 | atomic_inc(&mce_entry); | ||
| 881 | |||
| 882 | __get_cpu_var(mce_exception_count)++; | ||
| 883 | |||
| 884 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | ||
| 885 | 18, SIGKILL) == NOTIFY_STOP) | ||
| 886 | goto out; | ||
| 887 | if (!banks) | ||
| 888 | goto out; | ||
| 889 | |||
| 890 | order = atomic_add_return(1, &mce_callin); | ||
| 891 | mce_setup(&m); | ||
| 892 | |||
| 893 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | ||
| 894 | no_way_out = mce_no_way_out(&m, &msg); | ||
| 895 | |||
| 896 | final = &__get_cpu_var(mces_seen); | ||
| 897 | *final = m; | ||
| 898 | |||
| 899 | barrier(); | ||
| 900 | |||
| 901 | /* | ||
| 902 | * When no restart IP must always kill or panic. | ||
| 903 | */ | ||
| 904 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) | ||
| 905 | kill_it = 1; | ||
| 906 | |||
| 907 | /* | ||
| 908 | * Go through all the banks in exclusion of the other CPUs. | ||
| 909 | * This way we don't report duplicated events on shared banks | ||
| 910 | * because the first one to see it will clear it. | ||
| 911 | */ | ||
| 912 | no_way_out = mce_start(no_way_out, &order); | ||
| 913 | for (i = 0; i < banks; i++) { | ||
| 914 | __clear_bit(i, toclear); | ||
| 915 | if (!bank[i]) | ||
| 916 | continue; | ||
| 917 | |||
| 918 | m.misc = 0; | ||
| 919 | m.addr = 0; | ||
| 920 | m.bank = i; | ||
| 921 | |||
| 922 | m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | ||
| 923 | if ((m.status & MCI_STATUS_VAL) == 0) | ||
| 924 | continue; | ||
| 925 | |||
| 926 | /* | ||
| 927 | * Non uncorrected or non signaled errors are handled by | ||
| 928 | * machine_check_poll. Leave them alone, unless this panics. | ||
| 929 | */ | ||
| 930 | if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) && | ||
| 931 | !no_way_out) | ||
| 932 | continue; | ||
| 933 | |||
| 934 | /* | ||
| 935 | * Set taint even when machine check was not enabled. | ||
| 936 | */ | ||
| 937 | add_taint(TAINT_MACHINE_CHECK); | ||
| 938 | |||
| 939 | severity = mce_severity(&m, tolerant, NULL); | ||
| 940 | |||
| 941 | /* | ||
| 942 | * When machine check was for corrected handler don't touch, | ||
| 943 | * unless we're panicing. | ||
| 944 | */ | ||
| 945 | if (severity == MCE_KEEP_SEVERITY && !no_way_out) | ||
| 946 | continue; | ||
| 947 | __set_bit(i, toclear); | ||
| 948 | if (severity == MCE_NO_SEVERITY) { | ||
| 949 | /* | ||
| 950 | * Machine check event was not enabled. Clear, but | ||
| 951 | * ignore. | ||
| 952 | */ | ||
| 953 | continue; | ||
| 954 | } | ||
| 955 | |||
| 956 | /* | ||
| 957 | * Kill on action required. | ||
| 958 | */ | ||
| 959 | if (severity == MCE_AR_SEVERITY) | ||
| 960 | kill_it = 1; | ||
| 961 | |||
| 962 | if (m.status & MCI_STATUS_MISCV) | ||
| 963 | m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); | ||
| 964 | if (m.status & MCI_STATUS_ADDRV) | ||
| 965 | m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); | ||
| 966 | |||
| 967 | /* | ||
| 968 | * Action optional error. Queue address for later processing. | ||
| 969 | * When the ring overflows we just ignore the AO error. | ||
| 970 | * RED-PEN add some logging mechanism when | ||
| 971 | * usable_address or mce_add_ring fails. | ||
| 972 | * RED-PEN don't ignore overflow for tolerant == 0 | ||
| 973 | */ | ||
| 974 | if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) | ||
| 975 | mce_ring_add(m.addr >> PAGE_SHIFT); | ||
| 976 | |||
| 977 | mce_get_rip(&m, regs); | ||
| 978 | mce_log(&m); | ||
| 979 | |||
| 980 | if (severity > worst) { | ||
| 981 | *final = m; | ||
| 982 | worst = severity; | ||
| 983 | } | ||
| 984 | } | ||
| 985 | |||
| 986 | if (!no_way_out) | ||
| 987 | mce_clear_state(toclear); | ||
| 988 | |||
| 989 | /* | ||
| 990 | * Do most of the synchronization with other CPUs. | ||
| 991 | * When there's any problem use only local no_way_out state. | ||
| 992 | */ | ||
| 993 | if (mce_end(order) < 0) | ||
| 994 | no_way_out = worst >= MCE_PANIC_SEVERITY; | ||
| 995 | |||
| 996 | /* | ||
| 997 | * If we have decided that we just CAN'T continue, and the user | ||
| 998 | * has not set tolerant to an insane level, give up and die. | ||
| 999 | * | ||
| 1000 | * This is mainly used in the case when the system doesn't | ||
| 1001 | * support MCE broadcasting or it has been disabled. | ||
| 1002 | */ | ||
| 1003 | if (no_way_out && tolerant < 3) | ||
| 1004 | mce_panic("Fatal machine check on current CPU", final, msg); | ||
| 1005 | |||
| 1006 | /* | ||
| 1007 | * If the error seems to be unrecoverable, something should be | ||
| 1008 | * done. Try to kill as little as possible. If we can kill just | ||
| 1009 | * one task, do that. If the user has set the tolerance very | ||
| 1010 | * high, don't try to do anything at all. | ||
| 1011 | */ | ||
| 1012 | |||
| 1013 | if (kill_it && tolerant < 3) | ||
| 1014 | force_sig(SIGBUS, current); | ||
| 1015 | |||
| 1016 | /* notify userspace ASAP */ | ||
| 1017 | set_thread_flag(TIF_MCE_NOTIFY); | ||
| 1018 | |||
| 1019 | if (worst > 0) | ||
| 1020 | mce_report_event(regs); | ||
| 1021 | mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); | ||
| 1022 | out: | ||
| 1023 | atomic_dec(&mce_entry); | ||
| 1024 | sync_core(); | ||
| 1025 | } | ||
| 1026 | EXPORT_SYMBOL_GPL(do_machine_check); | ||
| 1027 | |||
| 1028 | /* dummy to break dependency. actual code is in mm/memory-failure.c */ | ||
| 1029 | void __attribute__((weak)) memory_failure(unsigned long pfn, int vector) | ||
| 1030 | { | ||
| 1031 | printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn); | ||
| 1032 | } | ||
| 1033 | |||
| 1034 | /* | ||
| 1035 | * Called after mce notification in process context. This code | ||
| 1036 | * is allowed to sleep. Call the high level VM handler to process | ||
| 1037 | * any corrupted pages. | ||
| 1038 | * Assume that the work queue code only calls this one at a time | ||
| 1039 | * per CPU. | ||
| 1040 | * Note we don't disable preemption, so this code might run on the wrong | ||
| 1041 | * CPU. In this case the event is picked up by the scheduled work queue. | ||
| 1042 | * This is merely a fast path to expedite processing in some common | ||
| 1043 | * cases. | ||
| 1044 | */ | ||
| 1045 | void mce_notify_process(void) | ||
| 1046 | { | ||
| 1047 | unsigned long pfn; | ||
| 1048 | mce_notify_irq(); | ||
| 1049 | while (mce_ring_get(&pfn)) | ||
| 1050 | memory_failure(pfn, MCE_VECTOR); | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | static void mce_process_work(struct work_struct *dummy) | ||
| 1054 | { | ||
| 1055 | mce_notify_process(); | ||
| 1056 | } | ||
| 1057 | |||
| 1058 | #ifdef CONFIG_X86_MCE_INTEL | ||
| 1059 | /*** | ||
| 1060 | * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog | ||
| 1061 | * @cpu: The CPU on which the event occurred. | ||
| 1062 | * @status: Event status information | ||
| 1063 | * | ||
| 1064 | * This function should be called by the thermal interrupt after the | ||
| 1065 | * event has been processed and the decision was made to log the event | ||
| 1066 | * further. | ||
| 1067 | * | ||
| 1068 | * The status parameter will be saved to the 'status' field of 'struct mce' | ||
| 1069 | * and historically has been the register value of the | ||
| 1070 | * MSR_IA32_THERMAL_STATUS (Intel) msr. | ||
| 1071 | */ | ||
| 1072 | void mce_log_therm_throt_event(__u64 status) | ||
| 1073 | { | ||
| 1074 | struct mce m; | ||
| 1075 | |||
| 1076 | mce_setup(&m); | ||
| 1077 | m.bank = MCE_THERMAL_BANK; | ||
| 1078 | m.status = status; | ||
| 1079 | mce_log(&m); | ||
| 1080 | } | ||
| 1081 | #endif /* CONFIG_X86_MCE_INTEL */ | ||
| 1082 | |||
| 1083 | /* | ||
| 1084 | * Periodic polling timer for "silent" machine check errors. If the | ||
| 1085 | * poller finds an MCE, poll 2x faster. When the poller finds no more | ||
| 1086 | * errors, poll 2x slower (up to check_interval seconds). | ||
| 1087 | */ | ||
| 1088 | static int check_interval = 5 * 60; /* 5 minutes */ | ||
| 1089 | |||
| 1090 | static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ | ||
| 1091 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | ||
| 1092 | |||
| 1093 | static void mcheck_timer(unsigned long data) | ||
| 1094 | { | ||
| 1095 | struct timer_list *t = &per_cpu(mce_timer, data); | ||
| 1096 | int *n; | ||
| 1097 | |||
| 1098 | WARN_ON(smp_processor_id() != data); | ||
| 1099 | |||
| 1100 | if (mce_available(¤t_cpu_data)) { | ||
| 1101 | machine_check_poll(MCP_TIMESTAMP, | ||
| 1102 | &__get_cpu_var(mce_poll_banks)); | ||
| 1103 | } | ||
| 1104 | |||
| 1105 | /* | ||
| 1106 | * Alert userspace if needed. If we logged an MCE, reduce the | ||
| 1107 | * polling interval, otherwise increase the polling interval. | ||
| 1108 | */ | ||
| 1109 | n = &__get_cpu_var(next_interval); | ||
| 1110 | if (mce_notify_irq()) | ||
| 1111 | *n = max(*n/2, HZ/100); | ||
| 1112 | else | ||
| 1113 | *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); | ||
| 1114 | |||
| 1115 | t->expires = jiffies + *n; | ||
| 1116 | add_timer(t); | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | static void mce_do_trigger(struct work_struct *work) | ||
| 1120 | { | ||
| 1121 | call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); | ||
| 1122 | } | ||
| 1123 | |||
| 1124 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | ||
| 1125 | |||
| 1126 | /* | ||
| 1127 | * Notify the user(s) about new machine check events. | ||
| 1128 | * Can be called from interrupt context, but not from machine check/NMI | ||
| 1129 | * context. | ||
| 1130 | */ | ||
| 1131 | int mce_notify_irq(void) | ||
| 1132 | { | ||
| 1133 | /* Not more than two messages every minute */ | ||
| 1134 | static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | ||
| 1135 | |||
| 1136 | clear_thread_flag(TIF_MCE_NOTIFY); | ||
| 1137 | |||
| 1138 | if (test_and_clear_bit(0, ¬ify_user)) { | ||
| 1139 | wake_up_interruptible(&mce_wait); | ||
| 1140 | |||
| 1141 | /* | ||
| 1142 | * There is no risk of missing notifications because | ||
| 1143 | * work_pending is always cleared before the function is | ||
| 1144 | * executed. | ||
| 1145 | */ | ||
| 1146 | if (trigger[0] && !work_pending(&mce_trigger_work)) | ||
| 1147 | schedule_work(&mce_trigger_work); | ||
| 1148 | |||
| 1149 | if (__ratelimit(&ratelimit)) | ||
| 1150 | printk(KERN_INFO "Machine check events logged\n"); | ||
| 1151 | |||
| 1152 | return 1; | ||
| 1153 | } | ||
| 1154 | return 0; | ||
| 1155 | } | ||
| 1156 | EXPORT_SYMBOL_GPL(mce_notify_irq); | ||
| 1157 | |||
| 1158 | /* | ||
| 1159 | * Initialize Machine Checks for a CPU. | ||
| 1160 | */ | ||
| 1161 | static int mce_cap_init(void) | ||
| 1162 | { | ||
| 1163 | unsigned b; | ||
| 1164 | u64 cap; | ||
| 1165 | |||
| 1166 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
| 1167 | |||
| 1168 | b = cap & MCG_BANKCNT_MASK; | ||
| 1169 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); | ||
| 1170 | |||
| 1171 | if (b > MAX_NR_BANKS) { | ||
| 1172 | printk(KERN_WARNING | ||
| 1173 | "MCE: Using only %u machine check banks out of %u\n", | ||
| 1174 | MAX_NR_BANKS, b); | ||
| 1175 | b = MAX_NR_BANKS; | ||
| 1176 | } | ||
| 1177 | |||
| 1178 | /* Don't support asymmetric configurations today */ | ||
| 1179 | WARN_ON(banks != 0 && b != banks); | ||
| 1180 | banks = b; | ||
| 1181 | if (!bank) { | ||
| 1182 | bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); | ||
| 1183 | if (!bank) | ||
| 1184 | return -ENOMEM; | ||
| 1185 | memset(bank, 0xff, banks * sizeof(u64)); | ||
| 1186 | } | ||
| 1187 | |||
| 1188 | /* Use accurate RIP reporting if available. */ | ||
| 1189 | if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) | ||
| 1190 | rip_msr = MSR_IA32_MCG_EIP; | ||
| 1191 | |||
| 1192 | if (cap & MCG_SER_P) | ||
| 1193 | mce_ser = 1; | ||
| 1194 | |||
| 1195 | return 0; | ||
| 1196 | } | ||
| 1197 | |||
| 1198 | static void mce_init(void) | ||
| 1199 | { | ||
| 1200 | mce_banks_t all_banks; | ||
| 1201 | u64 cap; | ||
| 1202 | int i; | ||
| 1203 | |||
| 1204 | /* | ||
| 1205 | * Log the machine checks left over from the previous reset. | ||
| 1206 | */ | ||
| 1207 | bitmap_fill(all_banks, MAX_NR_BANKS); | ||
| 1208 | machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); | ||
| 1209 | |||
| 1210 | set_in_cr4(X86_CR4_MCE); | ||
| 1211 | |||
| 1212 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
| 1213 | if (cap & MCG_CTL_P) | ||
| 1214 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
| 1215 | |||
| 1216 | for (i = 0; i < banks; i++) { | ||
| 1217 | if (skip_bank_init(i)) | ||
| 1218 | continue; | ||
| 1219 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | ||
| 1220 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
| 1221 | } | ||
| 1222 | } | ||
| 1223 | |||
| 1224 | /* Add per CPU specific workarounds here */ | ||
| 1225 | static void mce_cpu_quirks(struct cpuinfo_x86 *c) | ||
| 1226 | { | ||
| 1227 | /* This should be disabled by the BIOS, but isn't always */ | ||
| 1228 | if (c->x86_vendor == X86_VENDOR_AMD) { | ||
| 1229 | if (c->x86 == 15 && banks > 4) { | ||
| 1230 | /* | ||
| 1231 | * disable GART TBL walk error reporting, which | ||
| 1232 | * trips off incorrectly with the IOMMU & 3ware | ||
| 1233 | * & Cerberus: | ||
| 1234 | */ | ||
| 1235 | clear_bit(10, (unsigned long *)&bank[4]); | ||
| 1236 | } | ||
| 1237 | if (c->x86 <= 17 && mce_bootlog < 0) { | ||
| 1238 | /* | ||
| 1239 | * Lots of broken BIOS around that don't clear them | ||
| 1240 | * by default and leave crap in there. Don't log: | ||
| 1241 | */ | ||
| 1242 | mce_bootlog = 0; | ||
| 1243 | } | ||
| 1244 | /* | ||
| 1245 | * Various K7s with broken bank 0 around. Always disable | ||
| 1246 | * by default. | ||
| 1247 | */ | ||
| 1248 | if (c->x86 == 6) | ||
| 1249 | bank[0] = 0; | ||
| 1250 | } | ||
| 1251 | |||
| 1252 | if (c->x86_vendor == X86_VENDOR_INTEL) { | ||
| 1253 | /* | ||
| 1254 | * SDM documents that on family 6 bank 0 should not be written | ||
| 1255 | * because it aliases to another special BIOS controlled | ||
| 1256 | * register. | ||
| 1257 | * But it's not aliased anymore on model 0x1a+ | ||
| 1258 | * Don't ignore bank 0 completely because there could be a | ||
| 1259 | * valid event later, merely don't write CTL0. | ||
| 1260 | */ | ||
| 1261 | |||
| 1262 | if (c->x86 == 6 && c->x86_model < 0x1A) | ||
| 1263 | __set_bit(0, &dont_init_banks); | ||
| 1264 | |||
| 1265 | /* | ||
| 1266 | * All newer Intel systems support MCE broadcasting. Enable | ||
| 1267 | * synchronization with a one second timeout. | ||
| 1268 | */ | ||
| 1269 | if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && | ||
| 1270 | monarch_timeout < 0) | ||
| 1271 | monarch_timeout = USEC_PER_SEC; | ||
| 1272 | } | ||
| 1273 | if (monarch_timeout < 0) | ||
| 1274 | monarch_timeout = 0; | ||
| 1275 | if (mce_bootlog != 0) | ||
| 1276 | mce_panic_timeout = 30; | ||
| 1277 | } | ||
| 1278 | |||
| 1279 | static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | ||
| 1280 | { | ||
| 1281 | if (c->x86 != 5) | ||
| 1282 | return; | ||
| 1283 | switch (c->x86_vendor) { | ||
| 1284 | case X86_VENDOR_INTEL: | ||
| 1285 | if (mce_p5_enabled()) | ||
| 1286 | intel_p5_mcheck_init(c); | ||
| 1287 | break; | ||
| 1288 | case X86_VENDOR_CENTAUR: | ||
| 1289 | winchip_mcheck_init(c); | ||
| 1290 | break; | ||
| 1291 | } | ||
| 1292 | } | ||
| 1293 | |||
| 1294 | static void mce_cpu_features(struct cpuinfo_x86 *c) | ||
| 1295 | { | ||
| 1296 | switch (c->x86_vendor) { | ||
| 1297 | case X86_VENDOR_INTEL: | ||
| 1298 | mce_intel_feature_init(c); | ||
| 1299 | break; | ||
| 1300 | case X86_VENDOR_AMD: | ||
| 1301 | mce_amd_feature_init(c); | ||
| 1302 | break; | ||
| 1303 | default: | ||
| 1304 | break; | ||
| 1305 | } | ||
| 1306 | } | ||
| 1307 | |||
| 1308 | static void mce_init_timer(void) | ||
| 1309 | { | ||
| 1310 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
| 1311 | int *n = &__get_cpu_var(next_interval); | ||
| 1312 | |||
| 1313 | if (mce_ignore_ce) | ||
| 1314 | return; | ||
| 1315 | |||
| 1316 | *n = check_interval * HZ; | ||
| 1317 | if (!*n) | ||
| 1318 | return; | ||
| 1319 | setup_timer(t, mcheck_timer, smp_processor_id()); | ||
| 1320 | t->expires = round_jiffies(jiffies + *n); | ||
| 1321 | add_timer(t); | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | /* | ||
| 1325 | * Called for each booted CPU to set up machine checks. | ||
| 1326 | * Must be called with preempt off: | ||
| 1327 | */ | ||
| 1328 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | ||
| 1329 | { | ||
| 1330 | if (mce_disabled) | ||
| 1331 | return; | ||
| 1332 | |||
| 1333 | mce_ancient_init(c); | ||
| 1334 | |||
| 1335 | if (!mce_available(c)) | ||
| 1336 | return; | ||
| 1337 | |||
| 1338 | if (mce_cap_init() < 0) { | ||
| 1339 | mce_disabled = 1; | ||
| 1340 | return; | ||
| 1341 | } | ||
| 1342 | mce_cpu_quirks(c); | ||
| 1343 | |||
| 1344 | machine_check_vector = do_machine_check; | ||
| 1345 | |||
| 1346 | mce_init(); | ||
| 1347 | mce_cpu_features(c); | ||
| 1348 | mce_init_timer(); | ||
| 1349 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); | ||
| 1350 | } | ||
| 1351 | |||
| 1352 | /* | ||
| 1353 | * Character device to read and clear the MCE log. | ||
| 1354 | */ | ||
| 1355 | |||
| 1356 | static DEFINE_SPINLOCK(mce_state_lock); | ||
| 1357 | static int open_count; /* #times opened */ | ||
| 1358 | static int open_exclu; /* already open exclusive? */ | ||
| 1359 | |||
| 1360 | static int mce_open(struct inode *inode, struct file *file) | ||
| 1361 | { | ||
| 1362 | spin_lock(&mce_state_lock); | ||
| 1363 | |||
| 1364 | if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { | ||
| 1365 | spin_unlock(&mce_state_lock); | ||
| 1366 | |||
| 1367 | return -EBUSY; | ||
| 1368 | } | ||
| 1369 | |||
| 1370 | if (file->f_flags & O_EXCL) | ||
| 1371 | open_exclu = 1; | ||
| 1372 | open_count++; | ||
| 1373 | |||
| 1374 | spin_unlock(&mce_state_lock); | ||
| 1375 | |||
| 1376 | return nonseekable_open(inode, file); | ||
| 1377 | } | ||
| 1378 | |||
| 1379 | static int mce_release(struct inode *inode, struct file *file) | ||
| 1380 | { | ||
| 1381 | spin_lock(&mce_state_lock); | ||
| 1382 | |||
| 1383 | open_count--; | ||
| 1384 | open_exclu = 0; | ||
| 1385 | |||
| 1386 | spin_unlock(&mce_state_lock); | ||
| 1387 | |||
| 1388 | return 0; | ||
| 1389 | } | ||
| 1390 | |||
| 1391 | static void collect_tscs(void *data) | ||
| 1392 | { | ||
| 1393 | unsigned long *cpu_tsc = (unsigned long *)data; | ||
| 1394 | |||
| 1395 | rdtscll(cpu_tsc[smp_processor_id()]); | ||
| 1396 | } | ||
| 1397 | |||
| 1398 | static DEFINE_MUTEX(mce_read_mutex); | ||
| 1399 | |||
| 1400 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | ||
| 1401 | loff_t *off) | ||
| 1402 | { | ||
| 1403 | char __user *buf = ubuf; | ||
| 1404 | unsigned long *cpu_tsc; | ||
| 1405 | unsigned prev, next; | ||
| 1406 | int i, err; | ||
| 1407 | |||
| 1408 | cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); | ||
| 1409 | if (!cpu_tsc) | ||
| 1410 | return -ENOMEM; | ||
| 1411 | |||
| 1412 | mutex_lock(&mce_read_mutex); | ||
| 1413 | next = rcu_dereference(mcelog.next); | ||
| 1414 | |||
| 1415 | /* Only supports full reads right now */ | ||
| 1416 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | ||
| 1417 | mutex_unlock(&mce_read_mutex); | ||
| 1418 | kfree(cpu_tsc); | ||
| 1419 | |||
| 1420 | return -EINVAL; | ||
| 1421 | } | ||
| 1422 | |||
| 1423 | err = 0; | ||
| 1424 | prev = 0; | ||
| 1425 | do { | ||
| 1426 | for (i = prev; i < next; i++) { | ||
| 1427 | unsigned long start = jiffies; | ||
| 1428 | |||
| 1429 | while (!mcelog.entry[i].finished) { | ||
| 1430 | if (time_after_eq(jiffies, start + 2)) { | ||
| 1431 | memset(mcelog.entry + i, 0, | ||
| 1432 | sizeof(struct mce)); | ||
| 1433 | goto timeout; | ||
| 1434 | } | ||
| 1435 | cpu_relax(); | ||
| 1436 | } | ||
| 1437 | smp_rmb(); | ||
| 1438 | err |= copy_to_user(buf, mcelog.entry + i, | ||
| 1439 | sizeof(struct mce)); | ||
| 1440 | buf += sizeof(struct mce); | ||
| 1441 | timeout: | ||
| 1442 | ; | ||
| 1443 | } | ||
| 1444 | |||
| 1445 | memset(mcelog.entry + prev, 0, | ||
| 1446 | (next - prev) * sizeof(struct mce)); | ||
| 1447 | prev = next; | ||
| 1448 | next = cmpxchg(&mcelog.next, prev, 0); | ||
| 1449 | } while (next != prev); | ||
| 1450 | |||
| 1451 | synchronize_sched(); | ||
| 1452 | |||
| 1453 | /* | ||
| 1454 | * Collect entries that were still getting written before the | ||
| 1455 | * synchronize. | ||
| 1456 | */ | ||
| 1457 | on_each_cpu(collect_tscs, cpu_tsc, 1); | ||
| 1458 | |||
| 1459 | for (i = next; i < MCE_LOG_LEN; i++) { | ||
| 1460 | if (mcelog.entry[i].finished && | ||
| 1461 | mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { | ||
| 1462 | err |= copy_to_user(buf, mcelog.entry+i, | ||
| 1463 | sizeof(struct mce)); | ||
| 1464 | smp_rmb(); | ||
| 1465 | buf += sizeof(struct mce); | ||
| 1466 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | ||
| 1467 | } | ||
| 1468 | } | ||
| 1469 | mutex_unlock(&mce_read_mutex); | ||
| 1470 | kfree(cpu_tsc); | ||
| 1471 | |||
| 1472 | return err ? -EFAULT : buf - ubuf; | ||
| 1473 | } | ||
| 1474 | |||
| 1475 | static unsigned int mce_poll(struct file *file, poll_table *wait) | ||
| 1476 | { | ||
| 1477 | poll_wait(file, &mce_wait, wait); | ||
| 1478 | if (rcu_dereference(mcelog.next)) | ||
| 1479 | return POLLIN | POLLRDNORM; | ||
| 1480 | return 0; | ||
| 1481 | } | ||
| 1482 | |||
| 1483 | static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) | ||
| 1484 | { | ||
| 1485 | int __user *p = (int __user *)arg; | ||
| 1486 | |||
| 1487 | if (!capable(CAP_SYS_ADMIN)) | ||
| 1488 | return -EPERM; | ||
| 1489 | |||
| 1490 | switch (cmd) { | ||
| 1491 | case MCE_GET_RECORD_LEN: | ||
| 1492 | return put_user(sizeof(struct mce), p); | ||
| 1493 | case MCE_GET_LOG_LEN: | ||
| 1494 | return put_user(MCE_LOG_LEN, p); | ||
| 1495 | case MCE_GETCLEAR_FLAGS: { | ||
| 1496 | unsigned flags; | ||
| 1497 | |||
| 1498 | do { | ||
| 1499 | flags = mcelog.flags; | ||
| 1500 | } while (cmpxchg(&mcelog.flags, flags, 0) != flags); | ||
| 1501 | |||
| 1502 | return put_user(flags, p); | ||
| 1503 | } | ||
| 1504 | default: | ||
| 1505 | return -ENOTTY; | ||
| 1506 | } | ||
| 1507 | } | ||
| 1508 | |||
| 1509 | /* Modified in mce-inject.c, so not static or const */ | ||
| 1510 | struct file_operations mce_chrdev_ops = { | ||
| 1511 | .open = mce_open, | ||
| 1512 | .release = mce_release, | ||
| 1513 | .read = mce_read, | ||
| 1514 | .poll = mce_poll, | ||
| 1515 | .unlocked_ioctl = mce_ioctl, | ||
| 1516 | }; | ||
| 1517 | EXPORT_SYMBOL_GPL(mce_chrdev_ops); | ||
| 1518 | |||
| 1519 | static struct miscdevice mce_log_device = { | ||
| 1520 | MISC_MCELOG_MINOR, | ||
| 1521 | "mcelog", | ||
| 1522 | &mce_chrdev_ops, | ||
| 1523 | }; | ||
| 1524 | |||
| 1525 | /* | ||
| 1526 | * mce=off Disables machine check | ||
| 1527 | * mce=no_cmci Disables CMCI | ||
| 1528 | * mce=dont_log_ce Clears corrected events silently, no log created for CEs. | ||
| 1529 | * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared. | ||
| 1530 | * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) | ||
| 1531 | * monarchtimeout is how long to wait for other CPUs on machine | ||
| 1532 | * check, or 0 to not wait | ||
| 1533 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. | ||
| 1534 | * mce=nobootlog Don't log MCEs from before booting. | ||
| 1535 | */ | ||
| 1536 | static int __init mcheck_enable(char *str) | ||
| 1537 | { | ||
| 1538 | if (*str == 0) | ||
| 1539 | enable_p5_mce(); | ||
| 1540 | if (*str == '=') | ||
| 1541 | str++; | ||
| 1542 | if (!strcmp(str, "off")) | ||
| 1543 | mce_disabled = 1; | ||
| 1544 | else if (!strcmp(str, "no_cmci")) | ||
| 1545 | mce_cmci_disabled = 1; | ||
| 1546 | else if (!strcmp(str, "dont_log_ce")) | ||
| 1547 | mce_dont_log_ce = 1; | ||
| 1548 | else if (!strcmp(str, "ignore_ce")) | ||
| 1549 | mce_ignore_ce = 1; | ||
| 1550 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) | ||
| 1551 | mce_bootlog = (str[0] == 'b'); | ||
| 1552 | else if (isdigit(str[0])) { | ||
| 1553 | get_option(&str, &tolerant); | ||
| 1554 | if (*str == ',') { | ||
| 1555 | ++str; | ||
| 1556 | get_option(&str, &monarch_timeout); | ||
| 1557 | } | ||
| 1558 | } else { | ||
| 1559 | printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", | ||
| 1560 | str); | ||
| 1561 | return 0; | ||
| 1562 | } | ||
| 1563 | return 1; | ||
| 1564 | } | ||
| 1565 | __setup("mce", mcheck_enable); | ||
| 1566 | |||
| 1567 | /* | ||
| 1568 | * Sysfs support | ||
| 1569 | */ | ||
| 1570 | |||
| 1571 | /* | ||
| 1572 | * Disable machine checks on suspend and shutdown. We can't really handle | ||
| 1573 | * them later. | ||
| 1574 | */ | ||
| 1575 | static int mce_disable(void) | ||
| 1576 | { | ||
| 1577 | int i; | ||
| 1578 | |||
| 1579 | for (i = 0; i < banks; i++) { | ||
| 1580 | if (!skip_bank_init(i)) | ||
| 1581 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | ||
| 1582 | } | ||
| 1583 | return 0; | ||
| 1584 | } | ||
| 1585 | |||
| 1586 | static int mce_suspend(struct sys_device *dev, pm_message_t state) | ||
| 1587 | { | ||
| 1588 | return mce_disable(); | ||
| 1589 | } | ||
| 1590 | |||
| 1591 | static int mce_shutdown(struct sys_device *dev) | ||
| 1592 | { | ||
| 1593 | return mce_disable(); | ||
| 1594 | } | ||
| 1595 | |||
| 1596 | /* | ||
| 1597 | * On resume clear all MCE state. Don't want to see leftovers from the BIOS. | ||
| 1598 | * Only one CPU is active at this time, the others get re-added later using | ||
| 1599 | * CPU hotplug: | ||
| 1600 | */ | ||
| 1601 | static int mce_resume(struct sys_device *dev) | ||
| 1602 | { | ||
| 1603 | mce_init(); | ||
| 1604 | mce_cpu_features(¤t_cpu_data); | ||
| 1605 | |||
| 1606 | return 0; | ||
| 1607 | } | ||
| 1608 | |||
| 1609 | static void mce_cpu_restart(void *data) | ||
| 1610 | { | ||
| 1611 | del_timer_sync(&__get_cpu_var(mce_timer)); | ||
| 1612 | if (mce_available(¤t_cpu_data)) | ||
| 1613 | mce_init(); | ||
| 1614 | mce_init_timer(); | ||
| 1615 | } | ||
| 1616 | |||
| 1617 | /* Reinit MCEs after user configuration changes */ | ||
| 1618 | static void mce_restart(void) | ||
| 1619 | { | ||
| 1620 | on_each_cpu(mce_cpu_restart, NULL, 1); | ||
| 1621 | } | ||
| 1622 | |||
| 1623 | static struct sysdev_class mce_sysclass = { | ||
| 1624 | .suspend = mce_suspend, | ||
| 1625 | .shutdown = mce_shutdown, | ||
| 1626 | .resume = mce_resume, | ||
| 1627 | .name = "machinecheck", | ||
| 1628 | }; | ||
| 1629 | |||
| 1630 | DEFINE_PER_CPU(struct sys_device, mce_dev); | ||
| 1631 | |||
| 1632 | __cpuinitdata | ||
| 1633 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | ||
| 1634 | |||
| 1635 | static struct sysdev_attribute *bank_attrs; | ||
| 1636 | |||
| 1637 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, | ||
| 1638 | char *buf) | ||
| 1639 | { | ||
| 1640 | u64 b = bank[attr - bank_attrs]; | ||
| 1641 | |||
| 1642 | return sprintf(buf, "%llx\n", b); | ||
| 1643 | } | ||
| 1644 | |||
| 1645 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | ||
| 1646 | const char *buf, size_t size) | ||
| 1647 | { | ||
| 1648 | u64 new; | ||
| 1649 | |||
| 1650 | if (strict_strtoull(buf, 0, &new) < 0) | ||
| 1651 | return -EINVAL; | ||
| 1652 | |||
| 1653 | bank[attr - bank_attrs] = new; | ||
| 1654 | mce_restart(); | ||
| 1655 | |||
| 1656 | return size; | ||
| 1657 | } | ||
| 1658 | |||
| 1659 | static ssize_t | ||
| 1660 | show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) | ||
| 1661 | { | ||
| 1662 | strcpy(buf, trigger); | ||
| 1663 | strcat(buf, "\n"); | ||
| 1664 | return strlen(trigger) + 1; | ||
| 1665 | } | ||
| 1666 | |||
| 1667 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | ||
| 1668 | const char *buf, size_t siz) | ||
| 1669 | { | ||
| 1670 | char *p; | ||
| 1671 | int len; | ||
| 1672 | |||
| 1673 | strncpy(trigger, buf, sizeof(trigger)); | ||
| 1674 | trigger[sizeof(trigger)-1] = 0; | ||
| 1675 | len = strlen(trigger); | ||
| 1676 | p = strchr(trigger, '\n'); | ||
| 1677 | |||
| 1678 | if (*p) | ||
| 1679 | *p = 0; | ||
| 1680 | |||
| 1681 | return len; | ||
| 1682 | } | ||
| 1683 | |||
| 1684 | static ssize_t store_int_with_restart(struct sys_device *s, | ||
| 1685 | struct sysdev_attribute *attr, | ||
| 1686 | const char *buf, size_t size) | ||
| 1687 | { | ||
| 1688 | ssize_t ret = sysdev_store_int(s, attr, buf, size); | ||
| 1689 | mce_restart(); | ||
| 1690 | return ret; | ||
| 1691 | } | ||
| 1692 | |||
| 1693 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | ||
| 1694 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | ||
| 1695 | static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); | ||
| 1696 | |||
| 1697 | static struct sysdev_ext_attribute attr_check_interval = { | ||
| 1698 | _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, | ||
| 1699 | store_int_with_restart), | ||
| 1700 | &check_interval | ||
| 1701 | }; | ||
| 1702 | |||
| 1703 | static struct sysdev_attribute *mce_attrs[] = { | ||
| 1704 | &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, | ||
| 1705 | &attr_monarch_timeout.attr, | ||
| 1706 | NULL | ||
| 1707 | }; | ||
| 1708 | |||
| 1709 | static cpumask_var_t mce_dev_initialized; | ||
| 1710 | |||
| 1711 | /* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ | ||
| 1712 | static __cpuinit int mce_create_device(unsigned int cpu) | ||
| 1713 | { | ||
| 1714 | int err; | ||
| 1715 | int i; | ||
| 1716 | |||
| 1717 | if (!mce_available(&boot_cpu_data)) | ||
| 1718 | return -EIO; | ||
| 1719 | |||
| 1720 | memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject)); | ||
| 1721 | per_cpu(mce_dev, cpu).id = cpu; | ||
| 1722 | per_cpu(mce_dev, cpu).cls = &mce_sysclass; | ||
| 1723 | |||
| 1724 | err = sysdev_register(&per_cpu(mce_dev, cpu)); | ||
| 1725 | if (err) | ||
| 1726 | return err; | ||
| 1727 | |||
| 1728 | for (i = 0; mce_attrs[i]; i++) { | ||
| 1729 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | ||
| 1730 | if (err) | ||
| 1731 | goto error; | ||
| 1732 | } | ||
| 1733 | for (i = 0; i < banks; i++) { | ||
| 1734 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), | ||
| 1735 | &bank_attrs[i]); | ||
| 1736 | if (err) | ||
| 1737 | goto error2; | ||
| 1738 | } | ||
| 1739 | cpumask_set_cpu(cpu, mce_dev_initialized); | ||
| 1740 | |||
| 1741 | return 0; | ||
| 1742 | error2: | ||
| 1743 | while (--i >= 0) | ||
| 1744 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); | ||
| 1745 | error: | ||
| 1746 | while (--i >= 0) | ||
| 1747 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | ||
| 1748 | |||
| 1749 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | ||
| 1750 | |||
| 1751 | return err; | ||
| 1752 | } | ||
| 1753 | |||
| 1754 | static __cpuinit void mce_remove_device(unsigned int cpu) | ||
| 1755 | { | ||
| 1756 | int i; | ||
| 1757 | |||
| 1758 | if (!cpumask_test_cpu(cpu, mce_dev_initialized)) | ||
| 1759 | return; | ||
| 1760 | |||
| 1761 | for (i = 0; mce_attrs[i]; i++) | ||
| 1762 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | ||
| 1763 | |||
| 1764 | for (i = 0; i < banks; i++) | ||
| 1765 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); | ||
| 1766 | |||
| 1767 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | ||
| 1768 | cpumask_clear_cpu(cpu, mce_dev_initialized); | ||
| 1769 | } | ||
| 1770 | |||
| 1771 | /* Make sure there are no machine checks on offlined CPUs. */ | ||
| 1772 | static void mce_disable_cpu(void *h) | ||
| 1773 | { | ||
| 1774 | unsigned long action = *(unsigned long *)h; | ||
| 1775 | int i; | ||
| 1776 | |||
| 1777 | if (!mce_available(¤t_cpu_data)) | ||
| 1778 | return; | ||
| 1779 | if (!(action & CPU_TASKS_FROZEN)) | ||
| 1780 | cmci_clear(); | ||
| 1781 | for (i = 0; i < banks; i++) { | ||
| 1782 | if (!skip_bank_init(i)) | ||
| 1783 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | ||
| 1784 | } | ||
| 1785 | } | ||
| 1786 | |||
| 1787 | static void mce_reenable_cpu(void *h) | ||
| 1788 | { | ||
| 1789 | unsigned long action = *(unsigned long *)h; | ||
| 1790 | int i; | ||
| 1791 | |||
| 1792 | if (!mce_available(¤t_cpu_data)) | ||
| 1793 | return; | ||
| 1794 | |||
| 1795 | if (!(action & CPU_TASKS_FROZEN)) | ||
| 1796 | cmci_reenable(); | ||
| 1797 | for (i = 0; i < banks; i++) { | ||
| 1798 | if (!skip_bank_init(i)) | ||
| 1799 | wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); | ||
| 1800 | } | ||
| 1801 | } | ||
| 1802 | |||
| 1803 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | ||
| 1804 | static int __cpuinit | ||
| 1805 | mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
| 1806 | { | ||
| 1807 | unsigned int cpu = (unsigned long)hcpu; | ||
| 1808 | struct timer_list *t = &per_cpu(mce_timer, cpu); | ||
| 1809 | |||
| 1810 | switch (action) { | ||
| 1811 | case CPU_ONLINE: | ||
| 1812 | case CPU_ONLINE_FROZEN: | ||
| 1813 | mce_create_device(cpu); | ||
| 1814 | if (threshold_cpu_callback) | ||
| 1815 | threshold_cpu_callback(action, cpu); | ||
| 1816 | break; | ||
| 1817 | case CPU_DEAD: | ||
| 1818 | case CPU_DEAD_FROZEN: | ||
| 1819 | if (threshold_cpu_callback) | ||
| 1820 | threshold_cpu_callback(action, cpu); | ||
| 1821 | mce_remove_device(cpu); | ||
| 1822 | break; | ||
| 1823 | case CPU_DOWN_PREPARE: | ||
| 1824 | case CPU_DOWN_PREPARE_FROZEN: | ||
| 1825 | del_timer_sync(t); | ||
| 1826 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | ||
| 1827 | break; | ||
| 1828 | case CPU_DOWN_FAILED: | ||
| 1829 | case CPU_DOWN_FAILED_FROZEN: | ||
| 1830 | t->expires = round_jiffies(jiffies + | ||
| 1831 | __get_cpu_var(next_interval)); | ||
| 1832 | add_timer_on(t, cpu); | ||
| 1833 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | ||
| 1834 | break; | ||
| 1835 | case CPU_POST_DEAD: | ||
| 1836 | /* intentionally ignoring frozen here */ | ||
| 1837 | cmci_rediscover(cpu); | ||
| 1838 | break; | ||
| 1839 | } | ||
| 1840 | return NOTIFY_OK; | ||
| 1841 | } | ||
| 1842 | |||
| 1843 | static struct notifier_block mce_cpu_notifier __cpuinitdata = { | ||
| 1844 | .notifier_call = mce_cpu_callback, | ||
| 1845 | }; | ||
| 1846 | |||
| 1847 | static __init int mce_init_banks(void) | ||
| 1848 | { | ||
| 1849 | int i; | ||
| 1850 | |||
| 1851 | bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, | ||
| 1852 | GFP_KERNEL); | ||
| 1853 | if (!bank_attrs) | ||
| 1854 | return -ENOMEM; | ||
| 1855 | |||
| 1856 | for (i = 0; i < banks; i++) { | ||
| 1857 | struct sysdev_attribute *a = &bank_attrs[i]; | ||
| 1858 | |||
| 1859 | a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); | ||
| 1860 | if (!a->attr.name) | ||
| 1861 | goto nomem; | ||
| 1862 | |||
| 1863 | a->attr.mode = 0644; | ||
| 1864 | a->show = show_bank; | ||
| 1865 | a->store = set_bank; | ||
| 1866 | } | ||
| 1867 | return 0; | ||
| 1868 | |||
| 1869 | nomem: | ||
| 1870 | while (--i >= 0) | ||
| 1871 | kfree(bank_attrs[i].attr.name); | ||
| 1872 | kfree(bank_attrs); | ||
| 1873 | bank_attrs = NULL; | ||
| 1874 | |||
| 1875 | return -ENOMEM; | ||
| 1876 | } | ||
| 1877 | |||
| 1878 | static __init int mce_init_device(void) | ||
| 1879 | { | ||
| 1880 | int err; | ||
| 1881 | int i = 0; | ||
| 1882 | |||
| 1883 | if (!mce_available(&boot_cpu_data)) | ||
| 1884 | return -EIO; | ||
| 1885 | |||
| 1886 | alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); | ||
| 1887 | |||
| 1888 | err = mce_init_banks(); | ||
| 1889 | if (err) | ||
| 1890 | return err; | ||
| 1891 | |||
| 1892 | err = sysdev_class_register(&mce_sysclass); | ||
| 1893 | if (err) | ||
| 1894 | return err; | ||
| 1895 | |||
| 1896 | for_each_online_cpu(i) { | ||
| 1897 | err = mce_create_device(i); | ||
| 1898 | if (err) | ||
| 1899 | return err; | ||
| 1900 | } | ||
| 1901 | |||
| 1902 | register_hotcpu_notifier(&mce_cpu_notifier); | ||
| 1903 | misc_register(&mce_log_device); | ||
| 1904 | |||
| 1905 | return err; | ||
| 1906 | } | ||
| 1907 | |||
| 1908 | device_initcall(mce_init_device); | ||
| 1909 | |||
| 1910 | #else /* CONFIG_X86_OLD_MCE: */ | ||
| 1911 | |||
| 1912 | int nr_mce_banks; | ||
| 1913 | EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | ||
| 1914 | |||
| 1915 | /* This has to be run for each processor */ | ||
| 1916 | void mcheck_init(struct cpuinfo_x86 *c) | ||
| 1917 | { | ||
| 1918 | if (mce_disabled == 1) | ||
| 1919 | return; | ||
| 1920 | |||
| 1921 | switch (c->x86_vendor) { | ||
| 1922 | case X86_VENDOR_AMD: | ||
| 1923 | amd_mcheck_init(c); | ||
| 1924 | break; | ||
| 1925 | |||
| 1926 | case X86_VENDOR_INTEL: | ||
| 1927 | if (c->x86 == 5) | ||
| 1928 | intel_p5_mcheck_init(c); | ||
| 1929 | if (c->x86 == 6) | ||
| 1930 | intel_p6_mcheck_init(c); | ||
| 1931 | if (c->x86 == 15) | ||
| 1932 | intel_p4_mcheck_init(c); | ||
| 1933 | break; | ||
| 1934 | |||
| 1935 | case X86_VENDOR_CENTAUR: | ||
| 1936 | if (c->x86 == 5) | ||
| 1937 | winchip_mcheck_init(c); | ||
| 1938 | break; | ||
| 1939 | |||
| 1940 | default: | ||
| 1941 | break; | ||
| 1942 | } | ||
| 1943 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks); | ||
| 1944 | } | ||
| 1945 | |||
| 1946 | static int __init mcheck_enable(char *str) | ||
| 1947 | { | ||
| 1948 | mce_disabled = -1; | ||
| 1949 | return 1; | ||
| 1950 | } | ||
| 1951 | |||
| 1952 | __setup("mce", mcheck_enable); | ||
| 1953 | |||
| 1954 | #endif /* CONFIG_X86_OLD_MCE */ | ||
| 1955 | |||
| 1956 | /* | ||
| 1957 | * Old style boot options parsing. Only for compatibility. | ||
| 1958 | */ | ||
| 1959 | static int __init mcheck_disable(char *str) | ||
| 1960 | { | ||
| 1961 | mce_disabled = 1; | ||
| 1962 | return 1; | ||
| 1963 | } | ||
| 1964 | __setup("nomce", mcheck_disable); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h index ae9f628838f1..84a552b458c8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ b/arch/x86/kernel/cpu/mcheck/mce.h | |||
| @@ -1,14 +1,38 @@ | |||
| 1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
| 2 | #include <asm/mce.h> | 2 | #include <asm/mce.h> |
| 3 | 3 | ||
| 4 | #ifdef CONFIG_X86_OLD_MCE | ||
| 4 | void amd_mcheck_init(struct cpuinfo_x86 *c); | 5 | void amd_mcheck_init(struct cpuinfo_x86 *c); |
| 5 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | 6 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); |
| 6 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); | ||
| 7 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c); | 7 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c); |
| 8 | #endif | ||
| 9 | |||
| 10 | #ifdef CONFIG_X86_ANCIENT_MCE | ||
| 11 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); | ||
| 8 | void winchip_mcheck_init(struct cpuinfo_x86 *c); | 12 | void winchip_mcheck_init(struct cpuinfo_x86 *c); |
| 13 | extern int mce_p5_enable; | ||
| 14 | static inline int mce_p5_enabled(void) { return mce_p5_enable; } | ||
| 15 | static inline void enable_p5_mce(void) { mce_p5_enable = 1; } | ||
| 16 | #else | ||
| 17 | static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} | ||
| 18 | static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} | ||
| 19 | static inline int mce_p5_enabled(void) { return 0; } | ||
| 20 | static inline void enable_p5_mce(void) { } | ||
| 21 | #endif | ||
| 9 | 22 | ||
| 10 | /* Call the installed machine check handler for this CPU setup. */ | 23 | /* Call the installed machine check handler for this CPU setup. */ |
| 11 | extern void (*machine_check_vector)(struct pt_regs *, long error_code); | 24 | extern void (*machine_check_vector)(struct pt_regs *, long error_code); |
| 12 | 25 | ||
| 26 | #ifdef CONFIG_X86_OLD_MCE | ||
| 27 | |||
| 13 | extern int nr_mce_banks; | 28 | extern int nr_mce_banks; |
| 14 | 29 | ||
| 30 | void intel_set_thermal_handler(void); | ||
| 31 | |||
| 32 | #else | ||
| 33 | |||
| 34 | static inline void intel_set_thermal_handler(void) { } | ||
| 35 | |||
| 36 | #endif | ||
| 37 | |||
| 38 | void intel_init_thermal(struct cpuinfo_x86 *c); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c deleted file mode 100644 index 3552119b091d..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce_32.c +++ /dev/null | |||
| @@ -1,76 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * mce.c - x86 Machine Check Exception Reporting | ||
| 3 | * (c) 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>, Dave Jones <davej@redhat.com> | ||
| 4 | */ | ||
| 5 | |||
| 6 | #include <linux/init.h> | ||
| 7 | #include <linux/types.h> | ||
| 8 | #include <linux/kernel.h> | ||
| 9 | #include <linux/module.h> | ||
| 10 | #include <linux/smp.h> | ||
| 11 | #include <linux/thread_info.h> | ||
| 12 | |||
| 13 | #include <asm/processor.h> | ||
| 14 | #include <asm/system.h> | ||
| 15 | #include <asm/mce.h> | ||
| 16 | |||
| 17 | #include "mce.h" | ||
| 18 | |||
| 19 | int mce_disabled; | ||
| 20 | int nr_mce_banks; | ||
| 21 | |||
| 22 | EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | ||
| 23 | |||
| 24 | /* Handle unconfigured int18 (should never happen) */ | ||
| 25 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | ||
| 26 | { | ||
| 27 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); | ||
| 28 | } | ||
| 29 | |||
| 30 | /* Call the installed machine check handler for this CPU setup. */ | ||
| 31 | void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; | ||
| 32 | |||
| 33 | /* This has to be run for each processor */ | ||
| 34 | void mcheck_init(struct cpuinfo_x86 *c) | ||
| 35 | { | ||
| 36 | if (mce_disabled == 1) | ||
| 37 | return; | ||
| 38 | |||
| 39 | switch (c->x86_vendor) { | ||
| 40 | case X86_VENDOR_AMD: | ||
| 41 | amd_mcheck_init(c); | ||
| 42 | break; | ||
| 43 | |||
| 44 | case X86_VENDOR_INTEL: | ||
| 45 | if (c->x86 == 5) | ||
| 46 | intel_p5_mcheck_init(c); | ||
| 47 | if (c->x86 == 6) | ||
| 48 | intel_p6_mcheck_init(c); | ||
| 49 | if (c->x86 == 15) | ||
| 50 | intel_p4_mcheck_init(c); | ||
| 51 | break; | ||
| 52 | |||
| 53 | case X86_VENDOR_CENTAUR: | ||
| 54 | if (c->x86 == 5) | ||
| 55 | winchip_mcheck_init(c); | ||
| 56 | break; | ||
| 57 | |||
| 58 | default: | ||
| 59 | break; | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | static int __init mcheck_disable(char *str) | ||
| 64 | { | ||
| 65 | mce_disabled = 1; | ||
| 66 | return 1; | ||
| 67 | } | ||
| 68 | |||
| 69 | static int __init mcheck_enable(char *str) | ||
| 70 | { | ||
| 71 | mce_disabled = -1; | ||
| 72 | return 1; | ||
| 73 | } | ||
| 74 | |||
| 75 | __setup("nomce", mcheck_disable); | ||
| 76 | __setup("mce", mcheck_enable); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c deleted file mode 100644 index 289cc4815028..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ /dev/null | |||
| @@ -1,1188 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Machine check handler. | ||
| 3 | * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. | ||
| 4 | * Rest from unknown author(s). | ||
| 5 | * 2004 Andi Kleen. Rewrote most of it. | ||
| 6 | * Copyright 2008 Intel Corporation | ||
| 7 | * Author: Andi Kleen | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/init.h> | ||
| 11 | #include <linux/types.h> | ||
| 12 | #include <linux/kernel.h> | ||
| 13 | #include <linux/sched.h> | ||
| 14 | #include <linux/smp_lock.h> | ||
| 15 | #include <linux/string.h> | ||
| 16 | #include <linux/rcupdate.h> | ||
| 17 | #include <linux/kallsyms.h> | ||
| 18 | #include <linux/sysdev.h> | ||
| 19 | #include <linux/miscdevice.h> | ||
| 20 | #include <linux/fs.h> | ||
| 21 | #include <linux/capability.h> | ||
| 22 | #include <linux/cpu.h> | ||
| 23 | #include <linux/percpu.h> | ||
| 24 | #include <linux/poll.h> | ||
| 25 | #include <linux/thread_info.h> | ||
| 26 | #include <linux/ctype.h> | ||
| 27 | #include <linux/kmod.h> | ||
| 28 | #include <linux/kdebug.h> | ||
| 29 | #include <linux/kobject.h> | ||
| 30 | #include <linux/sysfs.h> | ||
| 31 | #include <linux/ratelimit.h> | ||
| 32 | #include <asm/processor.h> | ||
| 33 | #include <asm/msr.h> | ||
| 34 | #include <asm/mce.h> | ||
| 35 | #include <asm/uaccess.h> | ||
| 36 | #include <asm/smp.h> | ||
| 37 | #include <asm/idle.h> | ||
| 38 | |||
| 39 | #define MISC_MCELOG_MINOR 227 | ||
| 40 | |||
| 41 | atomic_t mce_entry; | ||
| 42 | |||
| 43 | static int mce_dont_init; | ||
| 44 | |||
| 45 | /* | ||
| 46 | * Tolerant levels: | ||
| 47 | * 0: always panic on uncorrected errors, log corrected errors | ||
| 48 | * 1: panic or SIGBUS on uncorrected errors, log corrected errors | ||
| 49 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors | ||
| 50 | * 3: never panic or SIGBUS, log all errors (for testing only) | ||
| 51 | */ | ||
| 52 | static int tolerant = 1; | ||
| 53 | static int banks; | ||
| 54 | static u64 *bank; | ||
| 55 | static unsigned long notify_user; | ||
| 56 | static int rip_msr; | ||
| 57 | static int mce_bootlog = -1; | ||
| 58 | static atomic_t mce_events; | ||
| 59 | |||
| 60 | static char trigger[128]; | ||
| 61 | static char *trigger_argv[2] = { trigger, NULL }; | ||
| 62 | |||
| 63 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | ||
| 64 | |||
| 65 | /* MCA banks polled by the period polling timer for corrected events */ | ||
| 66 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | ||
| 67 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | ||
| 68 | }; | ||
| 69 | |||
| 70 | /* Do initial initialization of a struct mce */ | ||
| 71 | void mce_setup(struct mce *m) | ||
| 72 | { | ||
| 73 | memset(m, 0, sizeof(struct mce)); | ||
| 74 | m->cpu = smp_processor_id(); | ||
| 75 | rdtscll(m->tsc); | ||
| 76 | } | ||
| 77 | |||
| 78 | /* | ||
| 79 | * Lockless MCE logging infrastructure. | ||
| 80 | * This avoids deadlocks on printk locks without having to break locks. Also | ||
| 81 | * separate MCEs from kernel messages to avoid bogus bug reports. | ||
| 82 | */ | ||
| 83 | |||
| 84 | static struct mce_log mcelog = { | ||
| 85 | MCE_LOG_SIGNATURE, | ||
| 86 | MCE_LOG_LEN, | ||
| 87 | }; | ||
| 88 | |||
| 89 | void mce_log(struct mce *mce) | ||
| 90 | { | ||
| 91 | unsigned next, entry; | ||
| 92 | atomic_inc(&mce_events); | ||
| 93 | mce->finished = 0; | ||
| 94 | wmb(); | ||
| 95 | for (;;) { | ||
| 96 | entry = rcu_dereference(mcelog.next); | ||
| 97 | for (;;) { | ||
| 98 | /* When the buffer fills up discard new entries. Assume | ||
| 99 | that the earlier errors are the more interesting. */ | ||
| 100 | if (entry >= MCE_LOG_LEN) { | ||
| 101 | set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); | ||
| 102 | return; | ||
| 103 | } | ||
| 104 | /* Old left over entry. Skip. */ | ||
| 105 | if (mcelog.entry[entry].finished) { | ||
| 106 | entry++; | ||
| 107 | continue; | ||
| 108 | } | ||
| 109 | break; | ||
| 110 | } | ||
| 111 | smp_rmb(); | ||
| 112 | next = entry + 1; | ||
| 113 | if (cmpxchg(&mcelog.next, entry, next) == entry) | ||
| 114 | break; | ||
| 115 | } | ||
| 116 | memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); | ||
| 117 | wmb(); | ||
| 118 | mcelog.entry[entry].finished = 1; | ||
| 119 | wmb(); | ||
| 120 | |||
| 121 | set_bit(0, ¬ify_user); | ||
| 122 | } | ||
| 123 | |||
| 124 | static void print_mce(struct mce *m) | ||
| 125 | { | ||
| 126 | printk(KERN_EMERG "\n" | ||
| 127 | KERN_EMERG "HARDWARE ERROR\n" | ||
| 128 | KERN_EMERG | ||
| 129 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | ||
| 130 | m->cpu, m->mcgstatus, m->bank, m->status); | ||
| 131 | if (m->ip) { | ||
| 132 | printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", | ||
| 133 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | ||
| 134 | m->cs, m->ip); | ||
| 135 | if (m->cs == __KERNEL_CS) | ||
| 136 | print_symbol("{%s}", m->ip); | ||
| 137 | printk("\n"); | ||
| 138 | } | ||
| 139 | printk(KERN_EMERG "TSC %llx ", m->tsc); | ||
| 140 | if (m->addr) | ||
| 141 | printk("ADDR %llx ", m->addr); | ||
| 142 | if (m->misc) | ||
| 143 | printk("MISC %llx ", m->misc); | ||
| 144 | printk("\n"); | ||
| 145 | printk(KERN_EMERG "This is not a software problem!\n"); | ||
| 146 | printk(KERN_EMERG "Run through mcelog --ascii to decode " | ||
| 147 | "and contact your hardware vendor\n"); | ||
| 148 | } | ||
| 149 | |||
| 150 | static void mce_panic(char *msg, struct mce *backup, unsigned long start) | ||
| 151 | { | ||
| 152 | int i; | ||
| 153 | |||
| 154 | oops_begin(); | ||
| 155 | for (i = 0; i < MCE_LOG_LEN; i++) { | ||
| 156 | unsigned long tsc = mcelog.entry[i].tsc; | ||
| 157 | |||
| 158 | if (time_before(tsc, start)) | ||
| 159 | continue; | ||
| 160 | print_mce(&mcelog.entry[i]); | ||
| 161 | if (backup && mcelog.entry[i].tsc == backup->tsc) | ||
| 162 | backup = NULL; | ||
| 163 | } | ||
| 164 | if (backup) | ||
| 165 | print_mce(backup); | ||
| 166 | panic(msg); | ||
| 167 | } | ||
| 168 | |||
| 169 | int mce_available(struct cpuinfo_x86 *c) | ||
| 170 | { | ||
| 171 | if (mce_dont_init) | ||
| 172 | return 0; | ||
| 173 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); | ||
| 174 | } | ||
| 175 | |||
| 176 | static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) | ||
| 177 | { | ||
| 178 | if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { | ||
| 179 | m->ip = regs->ip; | ||
| 180 | m->cs = regs->cs; | ||
| 181 | } else { | ||
| 182 | m->ip = 0; | ||
| 183 | m->cs = 0; | ||
| 184 | } | ||
| 185 | if (rip_msr) { | ||
| 186 | /* Assume the RIP in the MSR is exact. Is this true? */ | ||
| 187 | m->mcgstatus |= MCG_STATUS_EIPV; | ||
| 188 | rdmsrl(rip_msr, m->ip); | ||
| 189 | m->cs = 0; | ||
| 190 | } | ||
| 191 | } | ||
| 192 | |||
| 193 | /* | ||
| 194 | * Poll for corrected events or events that happened before reset. | ||
| 195 | * Those are just logged through /dev/mcelog. | ||
| 196 | * | ||
| 197 | * This is executed in standard interrupt context. | ||
| 198 | */ | ||
| 199 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | ||
| 200 | { | ||
| 201 | struct mce m; | ||
| 202 | int i; | ||
| 203 | |||
| 204 | mce_setup(&m); | ||
| 205 | |||
| 206 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); | ||
| 207 | for (i = 0; i < banks; i++) { | ||
| 208 | if (!bank[i] || !test_bit(i, *b)) | ||
| 209 | continue; | ||
| 210 | |||
| 211 | m.misc = 0; | ||
| 212 | m.addr = 0; | ||
| 213 | m.bank = i; | ||
| 214 | m.tsc = 0; | ||
| 215 | |||
| 216 | barrier(); | ||
| 217 | rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); | ||
| 218 | if (!(m.status & MCI_STATUS_VAL)) | ||
| 219 | continue; | ||
| 220 | |||
| 221 | /* | ||
| 222 | * Uncorrected events are handled by the exception handler | ||
| 223 | * when it is enabled. But when the exception is disabled log | ||
| 224 | * everything. | ||
| 225 | * | ||
| 226 | * TBD do the same check for MCI_STATUS_EN here? | ||
| 227 | */ | ||
| 228 | if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) | ||
| 229 | continue; | ||
| 230 | |||
| 231 | if (m.status & MCI_STATUS_MISCV) | ||
| 232 | rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); | ||
| 233 | if (m.status & MCI_STATUS_ADDRV) | ||
| 234 | rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); | ||
| 235 | |||
| 236 | if (!(flags & MCP_TIMESTAMP)) | ||
| 237 | m.tsc = 0; | ||
| 238 | /* | ||
| 239 | * Don't get the IP here because it's unlikely to | ||
| 240 | * have anything to do with the actual error location. | ||
| 241 | */ | ||
| 242 | if (!(flags & MCP_DONTLOG)) { | ||
| 243 | mce_log(&m); | ||
| 244 | add_taint(TAINT_MACHINE_CHECK); | ||
| 245 | } | ||
| 246 | |||
| 247 | /* | ||
| 248 | * Clear state for this bank. | ||
| 249 | */ | ||
| 250 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
| 251 | } | ||
| 252 | |||
| 253 | /* | ||
| 254 | * Don't clear MCG_STATUS here because it's only defined for | ||
| 255 | * exceptions. | ||
| 256 | */ | ||
| 257 | } | ||
| 258 | |||
| 259 | /* | ||
| 260 | * The actual machine check handler. This only handles real | ||
| 261 | * exceptions when something got corrupted coming in through int 18. | ||
| 262 | * | ||
| 263 | * This is executed in NMI context not subject to normal locking rules. This | ||
| 264 | * implies that most kernel services cannot be safely used. Don't even | ||
| 265 | * think about putting a printk in there! | ||
| 266 | */ | ||
| 267 | void do_machine_check(struct pt_regs * regs, long error_code) | ||
| 268 | { | ||
| 269 | struct mce m, panicm; | ||
| 270 | u64 mcestart = 0; | ||
| 271 | int i; | ||
| 272 | int panicm_found = 0; | ||
| 273 | /* | ||
| 274 | * If no_way_out gets set, there is no safe way to recover from this | ||
| 275 | * MCE. If tolerant is cranked up, we'll try anyway. | ||
| 276 | */ | ||
| 277 | int no_way_out = 0; | ||
| 278 | /* | ||
| 279 | * If kill_it gets set, there might be a way to recover from this | ||
| 280 | * error. | ||
| 281 | */ | ||
| 282 | int kill_it = 0; | ||
| 283 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | ||
| 284 | |||
| 285 | atomic_inc(&mce_entry); | ||
| 286 | |||
| 287 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | ||
| 288 | 18, SIGKILL) == NOTIFY_STOP) | ||
| 289 | goto out2; | ||
| 290 | if (!banks) | ||
| 291 | goto out2; | ||
| 292 | |||
| 293 | mce_setup(&m); | ||
| 294 | |||
| 295 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); | ||
| 296 | /* if the restart IP is not valid, we're done for */ | ||
| 297 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) | ||
| 298 | no_way_out = 1; | ||
| 299 | |||
| 300 | rdtscll(mcestart); | ||
| 301 | barrier(); | ||
| 302 | |||
| 303 | for (i = 0; i < banks; i++) { | ||
| 304 | __clear_bit(i, toclear); | ||
| 305 | if (!bank[i]) | ||
| 306 | continue; | ||
| 307 | |||
| 308 | m.misc = 0; | ||
| 309 | m.addr = 0; | ||
| 310 | m.bank = i; | ||
| 311 | |||
| 312 | rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); | ||
| 313 | if ((m.status & MCI_STATUS_VAL) == 0) | ||
| 314 | continue; | ||
| 315 | |||
| 316 | /* | ||
| 317 | * Non uncorrected errors are handled by machine_check_poll | ||
| 318 | * Leave them alone. | ||
| 319 | */ | ||
| 320 | if ((m.status & MCI_STATUS_UC) == 0) | ||
| 321 | continue; | ||
| 322 | |||
| 323 | /* | ||
| 324 | * Set taint even when machine check was not enabled. | ||
| 325 | */ | ||
| 326 | add_taint(TAINT_MACHINE_CHECK); | ||
| 327 | |||
| 328 | __set_bit(i, toclear); | ||
| 329 | |||
| 330 | if (m.status & MCI_STATUS_EN) { | ||
| 331 | /* if PCC was set, there's no way out */ | ||
| 332 | no_way_out |= !!(m.status & MCI_STATUS_PCC); | ||
| 333 | /* | ||
| 334 | * If this error was uncorrectable and there was | ||
| 335 | * an overflow, we're in trouble. If no overflow, | ||
| 336 | * we might get away with just killing a task. | ||
| 337 | */ | ||
| 338 | if (m.status & MCI_STATUS_UC) { | ||
| 339 | if (tolerant < 1 || m.status & MCI_STATUS_OVER) | ||
| 340 | no_way_out = 1; | ||
| 341 | kill_it = 1; | ||
| 342 | } | ||
| 343 | } else { | ||
| 344 | /* | ||
| 345 | * Machine check event was not enabled. Clear, but | ||
| 346 | * ignore. | ||
| 347 | */ | ||
| 348 | continue; | ||
| 349 | } | ||
| 350 | |||
| 351 | if (m.status & MCI_STATUS_MISCV) | ||
| 352 | rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); | ||
| 353 | if (m.status & MCI_STATUS_ADDRV) | ||
| 354 | rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); | ||
| 355 | |||
| 356 | mce_get_rip(&m, regs); | ||
| 357 | mce_log(&m); | ||
| 358 | |||
| 359 | /* Did this bank cause the exception? */ | ||
| 360 | /* Assume that the bank with uncorrectable errors did it, | ||
| 361 | and that there is only a single one. */ | ||
| 362 | if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) { | ||
| 363 | panicm = m; | ||
| 364 | panicm_found = 1; | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | /* If we didn't find an uncorrectable error, pick | ||
| 369 | the last one (shouldn't happen, just being safe). */ | ||
| 370 | if (!panicm_found) | ||
| 371 | panicm = m; | ||
| 372 | |||
| 373 | /* | ||
| 374 | * If we have decided that we just CAN'T continue, and the user | ||
| 375 | * has not set tolerant to an insane level, give up and die. | ||
| 376 | */ | ||
| 377 | if (no_way_out && tolerant < 3) | ||
| 378 | mce_panic("Machine check", &panicm, mcestart); | ||
| 379 | |||
| 380 | /* | ||
| 381 | * If the error seems to be unrecoverable, something should be | ||
| 382 | * done. Try to kill as little as possible. If we can kill just | ||
| 383 | * one task, do that. If the user has set the tolerance very | ||
| 384 | * high, don't try to do anything at all. | ||
| 385 | */ | ||
| 386 | if (kill_it && tolerant < 3) { | ||
| 387 | int user_space = 0; | ||
| 388 | |||
| 389 | /* | ||
| 390 | * If the EIPV bit is set, it means the saved IP is the | ||
| 391 | * instruction which caused the MCE. | ||
| 392 | */ | ||
| 393 | if (m.mcgstatus & MCG_STATUS_EIPV) | ||
| 394 | user_space = panicm.ip && (panicm.cs & 3); | ||
| 395 | |||
| 396 | /* | ||
| 397 | * If we know that the error was in user space, send a | ||
| 398 | * SIGBUS. Otherwise, panic if tolerance is low. | ||
| 399 | * | ||
| 400 | * force_sig() takes an awful lot of locks and has a slight | ||
| 401 | * risk of deadlocking. | ||
| 402 | */ | ||
| 403 | if (user_space) { | ||
| 404 | force_sig(SIGBUS, current); | ||
| 405 | } else if (panic_on_oops || tolerant < 2) { | ||
| 406 | mce_panic("Uncorrected machine check", | ||
| 407 | &panicm, mcestart); | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 411 | /* notify userspace ASAP */ | ||
| 412 | set_thread_flag(TIF_MCE_NOTIFY); | ||
| 413 | |||
| 414 | /* the last thing we do is clear state */ | ||
| 415 | for (i = 0; i < banks; i++) { | ||
| 416 | if (test_bit(i, toclear)) | ||
| 417 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
| 418 | } | ||
| 419 | wrmsrl(MSR_IA32_MCG_STATUS, 0); | ||
| 420 | out2: | ||
| 421 | atomic_dec(&mce_entry); | ||
| 422 | } | ||
| 423 | EXPORT_SYMBOL_GPL(do_machine_check); | ||
| 424 | |||
| 425 | #ifdef CONFIG_X86_MCE_INTEL | ||
| 426 | /*** | ||
| 427 | * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog | ||
| 428 | * @cpu: The CPU on which the event occurred. | ||
| 429 | * @status: Event status information | ||
| 430 | * | ||
| 431 | * This function should be called by the thermal interrupt after the | ||
| 432 | * event has been processed and the decision was made to log the event | ||
| 433 | * further. | ||
| 434 | * | ||
| 435 | * The status parameter will be saved to the 'status' field of 'struct mce' | ||
| 436 | * and historically has been the register value of the | ||
| 437 | * MSR_IA32_THERMAL_STATUS (Intel) msr. | ||
| 438 | */ | ||
| 439 | void mce_log_therm_throt_event(__u64 status) | ||
| 440 | { | ||
| 441 | struct mce m; | ||
| 442 | |||
| 443 | mce_setup(&m); | ||
| 444 | m.bank = MCE_THERMAL_BANK; | ||
| 445 | m.status = status; | ||
| 446 | mce_log(&m); | ||
| 447 | } | ||
| 448 | #endif /* CONFIG_X86_MCE_INTEL */ | ||
| 449 | |||
| 450 | /* | ||
| 451 | * Periodic polling timer for "silent" machine check errors. If the | ||
| 452 | * poller finds an MCE, poll 2x faster. When the poller finds no more | ||
| 453 | * errors, poll 2x slower (up to check_interval seconds). | ||
| 454 | */ | ||
| 455 | |||
| 456 | static int check_interval = 5 * 60; /* 5 minutes */ | ||
| 457 | static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ | ||
| 458 | static void mcheck_timer(unsigned long); | ||
| 459 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | ||
| 460 | |||
| 461 | static void mcheck_timer(unsigned long data) | ||
| 462 | { | ||
| 463 | struct timer_list *t = &per_cpu(mce_timer, data); | ||
| 464 | int *n; | ||
| 465 | |||
| 466 | WARN_ON(smp_processor_id() != data); | ||
| 467 | |||
| 468 | if (mce_available(¤t_cpu_data)) | ||
| 469 | machine_check_poll(MCP_TIMESTAMP, | ||
| 470 | &__get_cpu_var(mce_poll_banks)); | ||
| 471 | |||
| 472 | /* | ||
| 473 | * Alert userspace if needed. If we logged an MCE, reduce the | ||
| 474 | * polling interval, otherwise increase the polling interval. | ||
| 475 | */ | ||
| 476 | n = &__get_cpu_var(next_interval); | ||
| 477 | if (mce_notify_user()) { | ||
| 478 | *n = max(*n/2, HZ/100); | ||
| 479 | } else { | ||
| 480 | *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); | ||
| 481 | } | ||
| 482 | |||
| 483 | t->expires = jiffies + *n; | ||
| 484 | add_timer(t); | ||
| 485 | } | ||
| 486 | |||
| 487 | static void mce_do_trigger(struct work_struct *work) | ||
| 488 | { | ||
| 489 | call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); | ||
| 490 | } | ||
| 491 | |||
| 492 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | ||
| 493 | |||
| 494 | /* | ||
| 495 | * Notify the user(s) about new machine check events. | ||
| 496 | * Can be called from interrupt context, but not from machine check/NMI | ||
| 497 | * context. | ||
| 498 | */ | ||
| 499 | int mce_notify_user(void) | ||
| 500 | { | ||
| 501 | /* Not more than two messages every minute */ | ||
| 502 | static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | ||
| 503 | |||
| 504 | clear_thread_flag(TIF_MCE_NOTIFY); | ||
| 505 | if (test_and_clear_bit(0, ¬ify_user)) { | ||
| 506 | wake_up_interruptible(&mce_wait); | ||
| 507 | |||
| 508 | /* | ||
| 509 | * There is no risk of missing notifications because | ||
| 510 | * work_pending is always cleared before the function is | ||
| 511 | * executed. | ||
| 512 | */ | ||
| 513 | if (trigger[0] && !work_pending(&mce_trigger_work)) | ||
| 514 | schedule_work(&mce_trigger_work); | ||
| 515 | |||
| 516 | if (__ratelimit(&ratelimit)) | ||
| 517 | printk(KERN_INFO "Machine check events logged\n"); | ||
| 518 | |||
| 519 | return 1; | ||
| 520 | } | ||
| 521 | return 0; | ||
| 522 | } | ||
| 523 | |||
| 524 | /* see if the idle task needs to notify userspace */ | ||
| 525 | static int | ||
| 526 | mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk) | ||
| 527 | { | ||
| 528 | /* IDLE_END should be safe - interrupts are back on */ | ||
| 529 | if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY)) | ||
| 530 | mce_notify_user(); | ||
| 531 | |||
| 532 | return NOTIFY_OK; | ||
| 533 | } | ||
| 534 | |||
| 535 | static struct notifier_block mce_idle_notifier = { | ||
| 536 | .notifier_call = mce_idle_callback, | ||
| 537 | }; | ||
| 538 | |||
| 539 | static __init int periodic_mcheck_init(void) | ||
| 540 | { | ||
| 541 | idle_notifier_register(&mce_idle_notifier); | ||
| 542 | return 0; | ||
| 543 | } | ||
| 544 | __initcall(periodic_mcheck_init); | ||
| 545 | |||
| 546 | /* | ||
| 547 | * Initialize Machine Checks for a CPU. | ||
| 548 | */ | ||
| 549 | static int mce_cap_init(void) | ||
| 550 | { | ||
| 551 | u64 cap; | ||
| 552 | unsigned b; | ||
| 553 | |||
| 554 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
| 555 | b = cap & 0xff; | ||
| 556 | if (b > MAX_NR_BANKS) { | ||
| 557 | printk(KERN_WARNING | ||
| 558 | "MCE: Using only %u machine check banks out of %u\n", | ||
| 559 | MAX_NR_BANKS, b); | ||
| 560 | b = MAX_NR_BANKS; | ||
| 561 | } | ||
| 562 | |||
| 563 | /* Don't support asymmetric configurations today */ | ||
| 564 | WARN_ON(banks != 0 && b != banks); | ||
| 565 | banks = b; | ||
| 566 | if (!bank) { | ||
| 567 | bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); | ||
| 568 | if (!bank) | ||
| 569 | return -ENOMEM; | ||
| 570 | memset(bank, 0xff, banks * sizeof(u64)); | ||
| 571 | } | ||
| 572 | |||
| 573 | /* Use accurate RIP reporting if available. */ | ||
| 574 | if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) | ||
| 575 | rip_msr = MSR_IA32_MCG_EIP; | ||
| 576 | |||
| 577 | return 0; | ||
| 578 | } | ||
| 579 | |||
| 580 | static void mce_init(void *dummy) | ||
| 581 | { | ||
| 582 | u64 cap; | ||
| 583 | int i; | ||
| 584 | mce_banks_t all_banks; | ||
| 585 | |||
| 586 | /* | ||
| 587 | * Log the machine checks left over from the previous reset. | ||
| 588 | */ | ||
| 589 | bitmap_fill(all_banks, MAX_NR_BANKS); | ||
| 590 | machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); | ||
| 591 | |||
| 592 | set_in_cr4(X86_CR4_MCE); | ||
| 593 | |||
| 594 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
| 595 | if (cap & MCG_CTL_P) | ||
| 596 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
| 597 | |||
| 598 | for (i = 0; i < banks; i++) { | ||
| 599 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | ||
| 600 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
| 601 | } | ||
| 602 | } | ||
| 603 | |||
| 604 | /* Add per CPU specific workarounds here */ | ||
| 605 | static void mce_cpu_quirks(struct cpuinfo_x86 *c) | ||
| 606 | { | ||
| 607 | /* This should be disabled by the BIOS, but isn't always */ | ||
| 608 | if (c->x86_vendor == X86_VENDOR_AMD) { | ||
| 609 | if (c->x86 == 15 && banks > 4) | ||
| 610 | /* disable GART TBL walk error reporting, which trips off | ||
| 611 | incorrectly with the IOMMU & 3ware & Cerberus. */ | ||
| 612 | clear_bit(10, (unsigned long *)&bank[4]); | ||
| 613 | if(c->x86 <= 17 && mce_bootlog < 0) | ||
| 614 | /* Lots of broken BIOS around that don't clear them | ||
| 615 | by default and leave crap in there. Don't log. */ | ||
| 616 | mce_bootlog = 0; | ||
| 617 | } | ||
| 618 | |||
| 619 | } | ||
| 620 | |||
| 621 | static void mce_cpu_features(struct cpuinfo_x86 *c) | ||
| 622 | { | ||
| 623 | switch (c->x86_vendor) { | ||
| 624 | case X86_VENDOR_INTEL: | ||
| 625 | mce_intel_feature_init(c); | ||
| 626 | break; | ||
| 627 | case X86_VENDOR_AMD: | ||
| 628 | mce_amd_feature_init(c); | ||
| 629 | break; | ||
| 630 | default: | ||
| 631 | break; | ||
| 632 | } | ||
| 633 | } | ||
| 634 | |||
| 635 | static void mce_init_timer(void) | ||
| 636 | { | ||
| 637 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
| 638 | int *n = &__get_cpu_var(next_interval); | ||
| 639 | |||
| 640 | *n = check_interval * HZ; | ||
| 641 | if (!*n) | ||
| 642 | return; | ||
| 643 | setup_timer(t, mcheck_timer, smp_processor_id()); | ||
| 644 | t->expires = round_jiffies(jiffies + *n); | ||
| 645 | add_timer(t); | ||
| 646 | } | ||
| 647 | |||
| 648 | /* | ||
| 649 | * Called for each booted CPU to set up machine checks. | ||
| 650 | * Must be called with preempt off. | ||
| 651 | */ | ||
| 652 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | ||
| 653 | { | ||
| 654 | if (!mce_available(c)) | ||
| 655 | return; | ||
| 656 | |||
| 657 | if (mce_cap_init() < 0) { | ||
| 658 | mce_dont_init = 1; | ||
| 659 | return; | ||
| 660 | } | ||
| 661 | mce_cpu_quirks(c); | ||
| 662 | |||
| 663 | mce_init(NULL); | ||
| 664 | mce_cpu_features(c); | ||
| 665 | mce_init_timer(); | ||
| 666 | } | ||
| 667 | |||
| 668 | /* | ||
| 669 | * Character device to read and clear the MCE log. | ||
| 670 | */ | ||
| 671 | |||
| 672 | static DEFINE_SPINLOCK(mce_state_lock); | ||
| 673 | static int open_count; /* #times opened */ | ||
| 674 | static int open_exclu; /* already open exclusive? */ | ||
| 675 | |||
| 676 | static int mce_open(struct inode *inode, struct file *file) | ||
| 677 | { | ||
| 678 | lock_kernel(); | ||
| 679 | spin_lock(&mce_state_lock); | ||
| 680 | |||
| 681 | if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { | ||
| 682 | spin_unlock(&mce_state_lock); | ||
| 683 | unlock_kernel(); | ||
| 684 | return -EBUSY; | ||
| 685 | } | ||
| 686 | |||
| 687 | if (file->f_flags & O_EXCL) | ||
| 688 | open_exclu = 1; | ||
| 689 | open_count++; | ||
| 690 | |||
| 691 | spin_unlock(&mce_state_lock); | ||
| 692 | unlock_kernel(); | ||
| 693 | |||
| 694 | return nonseekable_open(inode, file); | ||
| 695 | } | ||
| 696 | |||
| 697 | static int mce_release(struct inode *inode, struct file *file) | ||
| 698 | { | ||
| 699 | spin_lock(&mce_state_lock); | ||
| 700 | |||
| 701 | open_count--; | ||
| 702 | open_exclu = 0; | ||
| 703 | |||
| 704 | spin_unlock(&mce_state_lock); | ||
| 705 | |||
| 706 | return 0; | ||
| 707 | } | ||
| 708 | |||
| 709 | static void collect_tscs(void *data) | ||
| 710 | { | ||
| 711 | unsigned long *cpu_tsc = (unsigned long *)data; | ||
| 712 | |||
| 713 | rdtscll(cpu_tsc[smp_processor_id()]); | ||
| 714 | } | ||
| 715 | |||
| 716 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | ||
| 717 | loff_t *off) | ||
| 718 | { | ||
| 719 | unsigned long *cpu_tsc; | ||
| 720 | static DEFINE_MUTEX(mce_read_mutex); | ||
| 721 | unsigned prev, next; | ||
| 722 | char __user *buf = ubuf; | ||
| 723 | int i, err; | ||
| 724 | |||
| 725 | cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); | ||
| 726 | if (!cpu_tsc) | ||
| 727 | return -ENOMEM; | ||
| 728 | |||
| 729 | mutex_lock(&mce_read_mutex); | ||
| 730 | next = rcu_dereference(mcelog.next); | ||
| 731 | |||
| 732 | /* Only supports full reads right now */ | ||
| 733 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | ||
| 734 | mutex_unlock(&mce_read_mutex); | ||
| 735 | kfree(cpu_tsc); | ||
| 736 | return -EINVAL; | ||
| 737 | } | ||
| 738 | |||
| 739 | err = 0; | ||
| 740 | prev = 0; | ||
| 741 | do { | ||
| 742 | for (i = prev; i < next; i++) { | ||
| 743 | unsigned long start = jiffies; | ||
| 744 | |||
| 745 | while (!mcelog.entry[i].finished) { | ||
| 746 | if (time_after_eq(jiffies, start + 2)) { | ||
| 747 | memset(mcelog.entry + i, 0, | ||
| 748 | sizeof(struct mce)); | ||
| 749 | goto timeout; | ||
| 750 | } | ||
| 751 | cpu_relax(); | ||
| 752 | } | ||
| 753 | smp_rmb(); | ||
| 754 | err |= copy_to_user(buf, mcelog.entry + i, | ||
| 755 | sizeof(struct mce)); | ||
| 756 | buf += sizeof(struct mce); | ||
| 757 | timeout: | ||
| 758 | ; | ||
| 759 | } | ||
| 760 | |||
| 761 | memset(mcelog.entry + prev, 0, | ||
| 762 | (next - prev) * sizeof(struct mce)); | ||
| 763 | prev = next; | ||
| 764 | next = cmpxchg(&mcelog.next, prev, 0); | ||
| 765 | } while (next != prev); | ||
| 766 | |||
| 767 | synchronize_sched(); | ||
| 768 | |||
| 769 | /* | ||
| 770 | * Collect entries that were still getting written before the | ||
| 771 | * synchronize. | ||
| 772 | */ | ||
| 773 | on_each_cpu(collect_tscs, cpu_tsc, 1); | ||
| 774 | for (i = next; i < MCE_LOG_LEN; i++) { | ||
| 775 | if (mcelog.entry[i].finished && | ||
| 776 | mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { | ||
| 777 | err |= copy_to_user(buf, mcelog.entry+i, | ||
| 778 | sizeof(struct mce)); | ||
| 779 | smp_rmb(); | ||
| 780 | buf += sizeof(struct mce); | ||
| 781 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | ||
| 782 | } | ||
| 783 | } | ||
| 784 | mutex_unlock(&mce_read_mutex); | ||
| 785 | kfree(cpu_tsc); | ||
| 786 | return err ? -EFAULT : buf - ubuf; | ||
| 787 | } | ||
| 788 | |||
| 789 | static unsigned int mce_poll(struct file *file, poll_table *wait) | ||
| 790 | { | ||
| 791 | poll_wait(file, &mce_wait, wait); | ||
| 792 | if (rcu_dereference(mcelog.next)) | ||
| 793 | return POLLIN | POLLRDNORM; | ||
| 794 | return 0; | ||
| 795 | } | ||
| 796 | |||
| 797 | static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) | ||
| 798 | { | ||
| 799 | int __user *p = (int __user *)arg; | ||
| 800 | |||
| 801 | if (!capable(CAP_SYS_ADMIN)) | ||
| 802 | return -EPERM; | ||
| 803 | switch (cmd) { | ||
| 804 | case MCE_GET_RECORD_LEN: | ||
| 805 | return put_user(sizeof(struct mce), p); | ||
| 806 | case MCE_GET_LOG_LEN: | ||
| 807 | return put_user(MCE_LOG_LEN, p); | ||
| 808 | case MCE_GETCLEAR_FLAGS: { | ||
| 809 | unsigned flags; | ||
| 810 | |||
| 811 | do { | ||
| 812 | flags = mcelog.flags; | ||
| 813 | } while (cmpxchg(&mcelog.flags, flags, 0) != flags); | ||
| 814 | return put_user(flags, p); | ||
| 815 | } | ||
| 816 | default: | ||
| 817 | return -ENOTTY; | ||
| 818 | } | ||
| 819 | } | ||
| 820 | |||
| 821 | static const struct file_operations mce_chrdev_ops = { | ||
| 822 | .open = mce_open, | ||
| 823 | .release = mce_release, | ||
| 824 | .read = mce_read, | ||
| 825 | .poll = mce_poll, | ||
| 826 | .unlocked_ioctl = mce_ioctl, | ||
| 827 | }; | ||
| 828 | |||
| 829 | static struct miscdevice mce_log_device = { | ||
| 830 | MISC_MCELOG_MINOR, | ||
| 831 | "mcelog", | ||
| 832 | &mce_chrdev_ops, | ||
| 833 | }; | ||
| 834 | |||
| 835 | /* | ||
| 836 | * Old style boot options parsing. Only for compatibility. | ||
| 837 | */ | ||
| 838 | static int __init mcheck_disable(char *str) | ||
| 839 | { | ||
| 840 | mce_dont_init = 1; | ||
| 841 | return 1; | ||
| 842 | } | ||
| 843 | |||
| 844 | /* mce=off disables machine check. | ||
| 845 | mce=TOLERANCELEVEL (number, see above) | ||
| 846 | mce=bootlog Log MCEs from before booting. Disabled by default on AMD. | ||
| 847 | mce=nobootlog Don't log MCEs from before booting. */ | ||
| 848 | static int __init mcheck_enable(char *str) | ||
| 849 | { | ||
| 850 | if (!strcmp(str, "off")) | ||
| 851 | mce_dont_init = 1; | ||
| 852 | else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) | ||
| 853 | mce_bootlog = str[0] == 'b'; | ||
| 854 | else if (isdigit(str[0])) | ||
| 855 | get_option(&str, &tolerant); | ||
| 856 | else | ||
| 857 | printk("mce= argument %s ignored. Please use /sys", str); | ||
| 858 | return 1; | ||
| 859 | } | ||
| 860 | |||
| 861 | __setup("nomce", mcheck_disable); | ||
| 862 | __setup("mce=", mcheck_enable); | ||
| 863 | |||
| 864 | /* | ||
| 865 | * Sysfs support | ||
| 866 | */ | ||
| 867 | |||
| 868 | /* | ||
| 869 | * Disable machine checks on suspend and shutdown. We can't really handle | ||
| 870 | * them later. | ||
| 871 | */ | ||
| 872 | static int mce_disable(void) | ||
| 873 | { | ||
| 874 | int i; | ||
| 875 | |||
| 876 | for (i = 0; i < banks; i++) | ||
| 877 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | ||
| 878 | return 0; | ||
| 879 | } | ||
| 880 | |||
| 881 | static int mce_suspend(struct sys_device *dev, pm_message_t state) | ||
| 882 | { | ||
| 883 | return mce_disable(); | ||
| 884 | } | ||
| 885 | |||
| 886 | static int mce_shutdown(struct sys_device *dev) | ||
| 887 | { | ||
| 888 | return mce_disable(); | ||
| 889 | } | ||
| 890 | |||
| 891 | /* On resume clear all MCE state. Don't want to see leftovers from the BIOS. | ||
| 892 | Only one CPU is active at this time, the others get readded later using | ||
| 893 | CPU hotplug. */ | ||
| 894 | static int mce_resume(struct sys_device *dev) | ||
| 895 | { | ||
| 896 | mce_init(NULL); | ||
| 897 | mce_cpu_features(¤t_cpu_data); | ||
| 898 | return 0; | ||
| 899 | } | ||
| 900 | |||
| 901 | static void mce_cpu_restart(void *data) | ||
| 902 | { | ||
| 903 | del_timer_sync(&__get_cpu_var(mce_timer)); | ||
| 904 | if (mce_available(¤t_cpu_data)) | ||
| 905 | mce_init(NULL); | ||
| 906 | mce_init_timer(); | ||
| 907 | } | ||
| 908 | |||
| 909 | /* Reinit MCEs after user configuration changes */ | ||
| 910 | static void mce_restart(void) | ||
| 911 | { | ||
| 912 | on_each_cpu(mce_cpu_restart, NULL, 1); | ||
| 913 | } | ||
| 914 | |||
| 915 | static struct sysdev_class mce_sysclass = { | ||
| 916 | .suspend = mce_suspend, | ||
| 917 | .shutdown = mce_shutdown, | ||
| 918 | .resume = mce_resume, | ||
| 919 | .name = "machinecheck", | ||
| 920 | }; | ||
| 921 | |||
| 922 | DEFINE_PER_CPU(struct sys_device, device_mce); | ||
| 923 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; | ||
| 924 | |||
| 925 | /* Why are there no generic functions for this? */ | ||
| 926 | #define ACCESSOR(name, var, start) \ | ||
| 927 | static ssize_t show_ ## name(struct sys_device *s, \ | ||
| 928 | struct sysdev_attribute *attr, \ | ||
| 929 | char *buf) { \ | ||
| 930 | return sprintf(buf, "%lx\n", (unsigned long)var); \ | ||
| 931 | } \ | ||
| 932 | static ssize_t set_ ## name(struct sys_device *s, \ | ||
| 933 | struct sysdev_attribute *attr, \ | ||
| 934 | const char *buf, size_t siz) { \ | ||
| 935 | char *end; \ | ||
| 936 | unsigned long new = simple_strtoul(buf, &end, 0); \ | ||
| 937 | if (end == buf) return -EINVAL; \ | ||
| 938 | var = new; \ | ||
| 939 | start; \ | ||
| 940 | return end-buf; \ | ||
| 941 | } \ | ||
| 942 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); | ||
| 943 | |||
| 944 | static struct sysdev_attribute *bank_attrs; | ||
| 945 | |||
| 946 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, | ||
| 947 | char *buf) | ||
| 948 | { | ||
| 949 | u64 b = bank[attr - bank_attrs]; | ||
| 950 | return sprintf(buf, "%llx\n", b); | ||
| 951 | } | ||
| 952 | |||
| 953 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | ||
| 954 | const char *buf, size_t siz) | ||
| 955 | { | ||
| 956 | char *end; | ||
| 957 | u64 new = simple_strtoull(buf, &end, 0); | ||
| 958 | if (end == buf) | ||
| 959 | return -EINVAL; | ||
| 960 | bank[attr - bank_attrs] = new; | ||
| 961 | mce_restart(); | ||
| 962 | return end-buf; | ||
| 963 | } | ||
| 964 | |||
| 965 | static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, | ||
| 966 | char *buf) | ||
| 967 | { | ||
| 968 | strcpy(buf, trigger); | ||
| 969 | strcat(buf, "\n"); | ||
| 970 | return strlen(trigger) + 1; | ||
| 971 | } | ||
| 972 | |||
| 973 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | ||
| 974 | const char *buf,size_t siz) | ||
| 975 | { | ||
| 976 | char *p; | ||
| 977 | int len; | ||
| 978 | strncpy(trigger, buf, sizeof(trigger)); | ||
| 979 | trigger[sizeof(trigger)-1] = 0; | ||
| 980 | len = strlen(trigger); | ||
| 981 | p = strchr(trigger, '\n'); | ||
| 982 | if (*p) *p = 0; | ||
| 983 | return len; | ||
| 984 | } | ||
| 985 | |||
| 986 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | ||
| 987 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | ||
| 988 | ACCESSOR(check_interval,check_interval,mce_restart()) | ||
| 989 | static struct sysdev_attribute *mce_attributes[] = { | ||
| 990 | &attr_tolerant.attr, &attr_check_interval, &attr_trigger, | ||
| 991 | NULL | ||
| 992 | }; | ||
| 993 | |||
| 994 | static cpumask_var_t mce_device_initialized; | ||
| 995 | |||
| 996 | /* Per cpu sysdev init. All of the cpus still share the same ctl bank */ | ||
| 997 | static __cpuinit int mce_create_device(unsigned int cpu) | ||
| 998 | { | ||
| 999 | int err; | ||
| 1000 | int i; | ||
| 1001 | |||
| 1002 | if (!mce_available(&boot_cpu_data)) | ||
| 1003 | return -EIO; | ||
| 1004 | |||
| 1005 | memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); | ||
| 1006 | per_cpu(device_mce,cpu).id = cpu; | ||
| 1007 | per_cpu(device_mce,cpu).cls = &mce_sysclass; | ||
| 1008 | |||
| 1009 | err = sysdev_register(&per_cpu(device_mce,cpu)); | ||
| 1010 | if (err) | ||
| 1011 | return err; | ||
| 1012 | |||
| 1013 | for (i = 0; mce_attributes[i]; i++) { | ||
| 1014 | err = sysdev_create_file(&per_cpu(device_mce,cpu), | ||
| 1015 | mce_attributes[i]); | ||
| 1016 | if (err) | ||
| 1017 | goto error; | ||
| 1018 | } | ||
| 1019 | for (i = 0; i < banks; i++) { | ||
| 1020 | err = sysdev_create_file(&per_cpu(device_mce, cpu), | ||
| 1021 | &bank_attrs[i]); | ||
| 1022 | if (err) | ||
| 1023 | goto error2; | ||
| 1024 | } | ||
| 1025 | cpumask_set_cpu(cpu, mce_device_initialized); | ||
| 1026 | |||
| 1027 | return 0; | ||
| 1028 | error2: | ||
| 1029 | while (--i >= 0) { | ||
| 1030 | sysdev_remove_file(&per_cpu(device_mce, cpu), | ||
| 1031 | &bank_attrs[i]); | ||
| 1032 | } | ||
| 1033 | error: | ||
| 1034 | while (--i >= 0) { | ||
| 1035 | sysdev_remove_file(&per_cpu(device_mce,cpu), | ||
| 1036 | mce_attributes[i]); | ||
| 1037 | } | ||
| 1038 | sysdev_unregister(&per_cpu(device_mce,cpu)); | ||
| 1039 | |||
| 1040 | return err; | ||
| 1041 | } | ||
| 1042 | |||
| 1043 | static __cpuinit void mce_remove_device(unsigned int cpu) | ||
| 1044 | { | ||
| 1045 | int i; | ||
| 1046 | |||
| 1047 | if (!cpumask_test_cpu(cpu, mce_device_initialized)) | ||
| 1048 | return; | ||
| 1049 | |||
| 1050 | for (i = 0; mce_attributes[i]; i++) | ||
| 1051 | sysdev_remove_file(&per_cpu(device_mce,cpu), | ||
| 1052 | mce_attributes[i]); | ||
| 1053 | for (i = 0; i < banks; i++) | ||
| 1054 | sysdev_remove_file(&per_cpu(device_mce, cpu), | ||
| 1055 | &bank_attrs[i]); | ||
| 1056 | sysdev_unregister(&per_cpu(device_mce,cpu)); | ||
| 1057 | cpumask_clear_cpu(cpu, mce_device_initialized); | ||
| 1058 | } | ||
| 1059 | |||
| 1060 | /* Make sure there are no machine checks on offlined CPUs. */ | ||
| 1061 | static void mce_disable_cpu(void *h) | ||
| 1062 | { | ||
| 1063 | int i; | ||
| 1064 | unsigned long action = *(unsigned long *)h; | ||
| 1065 | |||
| 1066 | if (!mce_available(¤t_cpu_data)) | ||
| 1067 | return; | ||
| 1068 | if (!(action & CPU_TASKS_FROZEN)) | ||
| 1069 | cmci_clear(); | ||
| 1070 | for (i = 0; i < banks; i++) | ||
| 1071 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | static void mce_reenable_cpu(void *h) | ||
| 1075 | { | ||
| 1076 | int i; | ||
| 1077 | unsigned long action = *(unsigned long *)h; | ||
| 1078 | |||
| 1079 | if (!mce_available(¤t_cpu_data)) | ||
| 1080 | return; | ||
| 1081 | if (!(action & CPU_TASKS_FROZEN)) | ||
| 1082 | cmci_reenable(); | ||
| 1083 | for (i = 0; i < banks; i++) | ||
| 1084 | wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); | ||
| 1085 | } | ||
| 1086 | |||
| 1087 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | ||
| 1088 | static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, | ||
| 1089 | unsigned long action, void *hcpu) | ||
| 1090 | { | ||
| 1091 | unsigned int cpu = (unsigned long)hcpu; | ||
| 1092 | struct timer_list *t = &per_cpu(mce_timer, cpu); | ||
| 1093 | |||
| 1094 | switch (action) { | ||
| 1095 | case CPU_ONLINE: | ||
| 1096 | case CPU_ONLINE_FROZEN: | ||
| 1097 | mce_create_device(cpu); | ||
| 1098 | if (threshold_cpu_callback) | ||
| 1099 | threshold_cpu_callback(action, cpu); | ||
| 1100 | break; | ||
| 1101 | case CPU_DEAD: | ||
| 1102 | case CPU_DEAD_FROZEN: | ||
| 1103 | if (threshold_cpu_callback) | ||
| 1104 | threshold_cpu_callback(action, cpu); | ||
| 1105 | mce_remove_device(cpu); | ||
| 1106 | break; | ||
| 1107 | case CPU_DOWN_PREPARE: | ||
| 1108 | case CPU_DOWN_PREPARE_FROZEN: | ||
| 1109 | del_timer_sync(t); | ||
| 1110 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | ||
| 1111 | break; | ||
| 1112 | case CPU_DOWN_FAILED: | ||
| 1113 | case CPU_DOWN_FAILED_FROZEN: | ||
| 1114 | t->expires = round_jiffies(jiffies + | ||
| 1115 | __get_cpu_var(next_interval)); | ||
| 1116 | add_timer_on(t, cpu); | ||
| 1117 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | ||
| 1118 | break; | ||
| 1119 | case CPU_POST_DEAD: | ||
| 1120 | /* intentionally ignoring frozen here */ | ||
| 1121 | cmci_rediscover(cpu); | ||
| 1122 | break; | ||
| 1123 | } | ||
| 1124 | return NOTIFY_OK; | ||
| 1125 | } | ||
| 1126 | |||
| 1127 | static struct notifier_block mce_cpu_notifier __cpuinitdata = { | ||
| 1128 | .notifier_call = mce_cpu_callback, | ||
| 1129 | }; | ||
| 1130 | |||
| 1131 | static __init int mce_init_banks(void) | ||
| 1132 | { | ||
| 1133 | int i; | ||
| 1134 | |||
| 1135 | bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, | ||
| 1136 | GFP_KERNEL); | ||
| 1137 | if (!bank_attrs) | ||
| 1138 | return -ENOMEM; | ||
| 1139 | |||
| 1140 | for (i = 0; i < banks; i++) { | ||
| 1141 | struct sysdev_attribute *a = &bank_attrs[i]; | ||
| 1142 | a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); | ||
| 1143 | if (!a->attr.name) | ||
| 1144 | goto nomem; | ||
| 1145 | a->attr.mode = 0644; | ||
| 1146 | a->show = show_bank; | ||
| 1147 | a->store = set_bank; | ||
| 1148 | } | ||
| 1149 | return 0; | ||
| 1150 | |||
| 1151 | nomem: | ||
| 1152 | while (--i >= 0) | ||
| 1153 | kfree(bank_attrs[i].attr.name); | ||
| 1154 | kfree(bank_attrs); | ||
| 1155 | bank_attrs = NULL; | ||
| 1156 | return -ENOMEM; | ||
| 1157 | } | ||
| 1158 | |||
| 1159 | static __init int mce_init_device(void) | ||
| 1160 | { | ||
| 1161 | int err; | ||
| 1162 | int i = 0; | ||
| 1163 | |||
| 1164 | if (!mce_available(&boot_cpu_data)) | ||
| 1165 | return -EIO; | ||
| 1166 | |||
| 1167 | zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); | ||
| 1168 | |||
| 1169 | err = mce_init_banks(); | ||
| 1170 | if (err) | ||
| 1171 | return err; | ||
| 1172 | |||
| 1173 | err = sysdev_class_register(&mce_sysclass); | ||
| 1174 | if (err) | ||
| 1175 | return err; | ||
| 1176 | |||
| 1177 | for_each_online_cpu(i) { | ||
| 1178 | err = mce_create_device(i); | ||
| 1179 | if (err) | ||
| 1180 | return err; | ||
| 1181 | } | ||
| 1182 | |||
| 1183 | register_hotcpu_notifier(&mce_cpu_notifier); | ||
| 1184 | misc_register(&mce_log_device); | ||
| 1185 | return err; | ||
| 1186 | } | ||
| 1187 | |||
| 1188 | device_initcall(mce_init_device); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 56dde9c4bc96..ddae21620bda 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | |||
| @@ -13,22 +13,22 @@ | |||
| 13 | * | 13 | * |
| 14 | * All MC4_MISCi registers are shared between multi-cores | 14 | * All MC4_MISCi registers are shared between multi-cores |
| 15 | */ | 15 | */ |
| 16 | |||
| 17 | #include <linux/cpu.h> | ||
| 18 | #include <linux/errno.h> | ||
| 19 | #include <linux/init.h> | ||
| 20 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
| 21 | #include <linux/kobject.h> | ||
| 22 | #include <linux/notifier.h> | 17 | #include <linux/notifier.h> |
| 23 | #include <linux/sched.h> | 18 | #include <linux/kobject.h> |
| 24 | #include <linux/smp.h> | 19 | #include <linux/percpu.h> |
| 25 | #include <linux/sysdev.h> | 20 | #include <linux/sysdev.h> |
| 21 | #include <linux/errno.h> | ||
| 22 | #include <linux/sched.h> | ||
| 26 | #include <linux/sysfs.h> | 23 | #include <linux/sysfs.h> |
| 24 | #include <linux/init.h> | ||
| 25 | #include <linux/cpu.h> | ||
| 26 | #include <linux/smp.h> | ||
| 27 | |||
| 27 | #include <asm/apic.h> | 28 | #include <asm/apic.h> |
| 29 | #include <asm/idle.h> | ||
| 28 | #include <asm/mce.h> | 30 | #include <asm/mce.h> |
| 29 | #include <asm/msr.h> | 31 | #include <asm/msr.h> |
| 30 | #include <asm/percpu.h> | ||
| 31 | #include <asm/idle.h> | ||
| 32 | 32 | ||
| 33 | #define PFX "mce_threshold: " | 33 | #define PFX "mce_threshold: " |
| 34 | #define VERSION "version 1.1.1" | 34 | #define VERSION "version 1.1.1" |
| @@ -48,26 +48,26 @@ | |||
| 48 | #define MCG_XBLK_ADDR 0xC0000400 | 48 | #define MCG_XBLK_ADDR 0xC0000400 |
| 49 | 49 | ||
| 50 | struct threshold_block { | 50 | struct threshold_block { |
| 51 | unsigned int block; | 51 | unsigned int block; |
| 52 | unsigned int bank; | 52 | unsigned int bank; |
| 53 | unsigned int cpu; | 53 | unsigned int cpu; |
| 54 | u32 address; | 54 | u32 address; |
| 55 | u16 interrupt_enable; | 55 | u16 interrupt_enable; |
| 56 | u16 threshold_limit; | 56 | u16 threshold_limit; |
| 57 | struct kobject kobj; | 57 | struct kobject kobj; |
| 58 | struct list_head miscj; | 58 | struct list_head miscj; |
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | /* defaults used early on boot */ | 61 | /* defaults used early on boot */ |
| 62 | static struct threshold_block threshold_defaults = { | 62 | static struct threshold_block threshold_defaults = { |
| 63 | .interrupt_enable = 0, | 63 | .interrupt_enable = 0, |
| 64 | .threshold_limit = THRESHOLD_MAX, | 64 | .threshold_limit = THRESHOLD_MAX, |
| 65 | }; | 65 | }; |
| 66 | 66 | ||
| 67 | struct threshold_bank { | 67 | struct threshold_bank { |
| 68 | struct kobject *kobj; | 68 | struct kobject *kobj; |
| 69 | struct threshold_block *blocks; | 69 | struct threshold_block *blocks; |
| 70 | cpumask_var_t cpus; | 70 | cpumask_var_t cpus; |
| 71 | }; | 71 | }; |
| 72 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); | 72 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); |
| 73 | 73 | ||
| @@ -86,9 +86,9 @@ static void amd_threshold_interrupt(void); | |||
| 86 | */ | 86 | */ |
| 87 | 87 | ||
| 88 | struct thresh_restart { | 88 | struct thresh_restart { |
| 89 | struct threshold_block *b; | 89 | struct threshold_block *b; |
| 90 | int reset; | 90 | int reset; |
| 91 | u16 old_limit; | 91 | u16 old_limit; |
| 92 | }; | 92 | }; |
| 93 | 93 | ||
| 94 | /* must be called with correct cpu affinity */ | 94 | /* must be called with correct cpu affinity */ |
| @@ -110,6 +110,7 @@ static void threshold_restart_bank(void *_tr) | |||
| 110 | } else if (tr->old_limit) { /* change limit w/o reset */ | 110 | } else if (tr->old_limit) { /* change limit w/o reset */ |
| 111 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + | 111 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + |
| 112 | (tr->old_limit - tr->b->threshold_limit); | 112 | (tr->old_limit - tr->b->threshold_limit); |
| 113 | |||
| 113 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | | 114 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | |
| 114 | (new_count & THRESHOLD_MAX); | 115 | (new_count & THRESHOLD_MAX); |
| 115 | } | 116 | } |
| @@ -125,11 +126,11 @@ static void threshold_restart_bank(void *_tr) | |||
| 125 | /* cpu init entry point, called from mce.c with preempt off */ | 126 | /* cpu init entry point, called from mce.c with preempt off */ |
| 126 | void mce_amd_feature_init(struct cpuinfo_x86 *c) | 127 | void mce_amd_feature_init(struct cpuinfo_x86 *c) |
| 127 | { | 128 | { |
| 128 | unsigned int bank, block; | ||
| 129 | unsigned int cpu = smp_processor_id(); | 129 | unsigned int cpu = smp_processor_id(); |
| 130 | u8 lvt_off; | ||
| 131 | u32 low = 0, high = 0, address = 0; | 130 | u32 low = 0, high = 0, address = 0; |
| 131 | unsigned int bank, block; | ||
| 132 | struct thresh_restart tr; | 132 | struct thresh_restart tr; |
| 133 | u8 lvt_off; | ||
| 133 | 134 | ||
| 134 | for (bank = 0; bank < NR_BANKS; ++bank) { | 135 | for (bank = 0; bank < NR_BANKS; ++bank) { |
| 135 | for (block = 0; block < NR_BLOCKS; ++block) { | 136 | for (block = 0; block < NR_BLOCKS; ++block) { |
| @@ -140,8 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
| 140 | if (!address) | 141 | if (!address) |
| 141 | break; | 142 | break; |
| 142 | address += MCG_XBLK_ADDR; | 143 | address += MCG_XBLK_ADDR; |
| 143 | } | 144 | } else |
| 144 | else | ||
| 145 | ++address; | 145 | ++address; |
| 146 | 146 | ||
| 147 | if (rdmsr_safe(address, &low, &high)) | 147 | if (rdmsr_safe(address, &low, &high)) |
| @@ -193,9 +193,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
| 193 | */ | 193 | */ |
| 194 | static void amd_threshold_interrupt(void) | 194 | static void amd_threshold_interrupt(void) |
| 195 | { | 195 | { |
| 196 | u32 low = 0, high = 0, address = 0; | ||
| 196 | unsigned int bank, block; | 197 | unsigned int bank, block; |
| 197 | struct mce m; | 198 | struct mce m; |
| 198 | u32 low = 0, high = 0, address = 0; | ||
| 199 | 199 | ||
| 200 | mce_setup(&m); | 200 | mce_setup(&m); |
| 201 | 201 | ||
| @@ -204,16 +204,16 @@ static void amd_threshold_interrupt(void) | |||
| 204 | if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) | 204 | if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) |
| 205 | continue; | 205 | continue; |
| 206 | for (block = 0; block < NR_BLOCKS; ++block) { | 206 | for (block = 0; block < NR_BLOCKS; ++block) { |
| 207 | if (block == 0) | 207 | if (block == 0) { |
| 208 | address = MSR_IA32_MC0_MISC + bank * 4; | 208 | address = MSR_IA32_MC0_MISC + bank * 4; |
| 209 | else if (block == 1) { | 209 | } else if (block == 1) { |
| 210 | address = (low & MASK_BLKPTR_LO) >> 21; | 210 | address = (low & MASK_BLKPTR_LO) >> 21; |
| 211 | if (!address) | 211 | if (!address) |
| 212 | break; | 212 | break; |
| 213 | address += MCG_XBLK_ADDR; | 213 | address += MCG_XBLK_ADDR; |
| 214 | } | 214 | } else { |
| 215 | else | ||
| 216 | ++address; | 215 | ++address; |
| 216 | } | ||
| 217 | 217 | ||
| 218 | if (rdmsr_safe(address, &low, &high)) | 218 | if (rdmsr_safe(address, &low, &high)) |
| 219 | break; | 219 | break; |
| @@ -229,8 +229,10 @@ static void amd_threshold_interrupt(void) | |||
| 229 | (high & MASK_LOCKED_HI)) | 229 | (high & MASK_LOCKED_HI)) |
| 230 | continue; | 230 | continue; |
| 231 | 231 | ||
| 232 | /* Log the machine check that caused the threshold | 232 | /* |
| 233 | event. */ | 233 | * Log the machine check that caused the threshold |
| 234 | * event. | ||
| 235 | */ | ||
| 234 | machine_check_poll(MCP_TIMESTAMP, | 236 | machine_check_poll(MCP_TIMESTAMP, |
| 235 | &__get_cpu_var(mce_poll_banks)); | 237 | &__get_cpu_var(mce_poll_banks)); |
| 236 | 238 | ||
| @@ -254,48 +256,52 @@ static void amd_threshold_interrupt(void) | |||
| 254 | 256 | ||
| 255 | struct threshold_attr { | 257 | struct threshold_attr { |
| 256 | struct attribute attr; | 258 | struct attribute attr; |
| 257 | ssize_t(*show) (struct threshold_block *, char *); | 259 | ssize_t (*show) (struct threshold_block *, char *); |
| 258 | ssize_t(*store) (struct threshold_block *, const char *, size_t count); | 260 | ssize_t (*store) (struct threshold_block *, const char *, size_t count); |
| 259 | }; | 261 | }; |
| 260 | 262 | ||
| 261 | #define SHOW_FIELDS(name) \ | 263 | #define SHOW_FIELDS(name) \ |
| 262 | static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ | 264 | static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ |
| 263 | { \ | 265 | { \ |
| 264 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ | 266 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ |
| 265 | } | 267 | } |
| 266 | SHOW_FIELDS(interrupt_enable) | 268 | SHOW_FIELDS(interrupt_enable) |
| 267 | SHOW_FIELDS(threshold_limit) | 269 | SHOW_FIELDS(threshold_limit) |
| 268 | 270 | ||
| 269 | static ssize_t store_interrupt_enable(struct threshold_block *b, | 271 | static ssize_t |
| 270 | const char *buf, size_t count) | 272 | store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) |
| 271 | { | 273 | { |
| 272 | char *end; | ||
| 273 | struct thresh_restart tr; | 274 | struct thresh_restart tr; |
| 274 | unsigned long new = simple_strtoul(buf, &end, 0); | 275 | unsigned long new; |
| 275 | if (end == buf) | 276 | |
| 277 | if (strict_strtoul(buf, 0, &new) < 0) | ||
| 276 | return -EINVAL; | 278 | return -EINVAL; |
| 279 | |||
| 277 | b->interrupt_enable = !!new; | 280 | b->interrupt_enable = !!new; |
| 278 | 281 | ||
| 279 | tr.b = b; | 282 | tr.b = b; |
| 280 | tr.reset = 0; | 283 | tr.reset = 0; |
| 281 | tr.old_limit = 0; | 284 | tr.old_limit = 0; |
| 285 | |||
| 282 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); | 286 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); |
| 283 | 287 | ||
| 284 | return end - buf; | 288 | return size; |
| 285 | } | 289 | } |
| 286 | 290 | ||
| 287 | static ssize_t store_threshold_limit(struct threshold_block *b, | 291 | static ssize_t |
| 288 | const char *buf, size_t count) | 292 | store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) |
| 289 | { | 293 | { |
| 290 | char *end; | ||
| 291 | struct thresh_restart tr; | 294 | struct thresh_restart tr; |
| 292 | unsigned long new = simple_strtoul(buf, &end, 0); | 295 | unsigned long new; |
| 293 | if (end == buf) | 296 | |
| 297 | if (strict_strtoul(buf, 0, &new) < 0) | ||
| 294 | return -EINVAL; | 298 | return -EINVAL; |
| 299 | |||
| 295 | if (new > THRESHOLD_MAX) | 300 | if (new > THRESHOLD_MAX) |
| 296 | new = THRESHOLD_MAX; | 301 | new = THRESHOLD_MAX; |
| 297 | if (new < 1) | 302 | if (new < 1) |
| 298 | new = 1; | 303 | new = 1; |
| 304 | |||
| 299 | tr.old_limit = b->threshold_limit; | 305 | tr.old_limit = b->threshold_limit; |
| 300 | b->threshold_limit = new; | 306 | b->threshold_limit = new; |
| 301 | tr.b = b; | 307 | tr.b = b; |
| @@ -303,12 +309,12 @@ static ssize_t store_threshold_limit(struct threshold_block *b, | |||
| 303 | 309 | ||
| 304 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); | 310 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); |
| 305 | 311 | ||
| 306 | return end - buf; | 312 | return size; |
| 307 | } | 313 | } |
| 308 | 314 | ||
| 309 | struct threshold_block_cross_cpu { | 315 | struct threshold_block_cross_cpu { |
| 310 | struct threshold_block *tb; | 316 | struct threshold_block *tb; |
| 311 | long retval; | 317 | long retval; |
| 312 | }; | 318 | }; |
| 313 | 319 | ||
| 314 | static void local_error_count_handler(void *_tbcc) | 320 | static void local_error_count_handler(void *_tbcc) |
| @@ -338,16 +344,13 @@ static ssize_t store_error_count(struct threshold_block *b, | |||
| 338 | return 1; | 344 | return 1; |
| 339 | } | 345 | } |
| 340 | 346 | ||
| 341 | #define THRESHOLD_ATTR(_name,_mode,_show,_store) { \ | 347 | #define RW_ATTR(val) \ |
| 342 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | 348 | static struct threshold_attr val = { \ |
| 343 | .show = _show, \ | 349 | .attr = {.name = __stringify(val), .mode = 0644 }, \ |
| 344 | .store = _store, \ | 350 | .show = show_## val, \ |
| 351 | .store = store_## val, \ | ||
| 345 | }; | 352 | }; |
| 346 | 353 | ||
| 347 | #define RW_ATTR(name) \ | ||
| 348 | static struct threshold_attr name = \ | ||
| 349 | THRESHOLD_ATTR(name, 0644, show_## name, store_## name) | ||
| 350 | |||
| 351 | RW_ATTR(interrupt_enable); | 354 | RW_ATTR(interrupt_enable); |
| 352 | RW_ATTR(threshold_limit); | 355 | RW_ATTR(threshold_limit); |
| 353 | RW_ATTR(error_count); | 356 | RW_ATTR(error_count); |
| @@ -359,15 +362,17 @@ static struct attribute *default_attrs[] = { | |||
| 359 | NULL | 362 | NULL |
| 360 | }; | 363 | }; |
| 361 | 364 | ||
| 362 | #define to_block(k) container_of(k, struct threshold_block, kobj) | 365 | #define to_block(k) container_of(k, struct threshold_block, kobj) |
| 363 | #define to_attr(a) container_of(a, struct threshold_attr, attr) | 366 | #define to_attr(a) container_of(a, struct threshold_attr, attr) |
| 364 | 367 | ||
| 365 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) | 368 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) |
| 366 | { | 369 | { |
| 367 | struct threshold_block *b = to_block(kobj); | 370 | struct threshold_block *b = to_block(kobj); |
| 368 | struct threshold_attr *a = to_attr(attr); | 371 | struct threshold_attr *a = to_attr(attr); |
| 369 | ssize_t ret; | 372 | ssize_t ret; |
| 373 | |||
| 370 | ret = a->show ? a->show(b, buf) : -EIO; | 374 | ret = a->show ? a->show(b, buf) : -EIO; |
| 375 | |||
| 371 | return ret; | 376 | return ret; |
| 372 | } | 377 | } |
| 373 | 378 | ||
| @@ -377,18 +382,20 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, | |||
| 377 | struct threshold_block *b = to_block(kobj); | 382 | struct threshold_block *b = to_block(kobj); |
| 378 | struct threshold_attr *a = to_attr(attr); | 383 | struct threshold_attr *a = to_attr(attr); |
| 379 | ssize_t ret; | 384 | ssize_t ret; |
| 385 | |||
| 380 | ret = a->store ? a->store(b, buf, count) : -EIO; | 386 | ret = a->store ? a->store(b, buf, count) : -EIO; |
| 387 | |||
| 381 | return ret; | 388 | return ret; |
| 382 | } | 389 | } |
| 383 | 390 | ||
| 384 | static struct sysfs_ops threshold_ops = { | 391 | static struct sysfs_ops threshold_ops = { |
| 385 | .show = show, | 392 | .show = show, |
| 386 | .store = store, | 393 | .store = store, |
| 387 | }; | 394 | }; |
| 388 | 395 | ||
| 389 | static struct kobj_type threshold_ktype = { | 396 | static struct kobj_type threshold_ktype = { |
| 390 | .sysfs_ops = &threshold_ops, | 397 | .sysfs_ops = &threshold_ops, |
| 391 | .default_attrs = default_attrs, | 398 | .default_attrs = default_attrs, |
| 392 | }; | 399 | }; |
| 393 | 400 | ||
| 394 | static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | 401 | static __cpuinit int allocate_threshold_blocks(unsigned int cpu, |
| @@ -396,9 +403,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | |||
| 396 | unsigned int block, | 403 | unsigned int block, |
| 397 | u32 address) | 404 | u32 address) |
| 398 | { | 405 | { |
| 399 | int err; | ||
| 400 | u32 low, high; | ||
| 401 | struct threshold_block *b = NULL; | 406 | struct threshold_block *b = NULL; |
| 407 | u32 low, high; | ||
| 408 | int err; | ||
| 402 | 409 | ||
| 403 | if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) | 410 | if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) |
| 404 | return 0; | 411 | return 0; |
| @@ -421,20 +428,21 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | |||
| 421 | if (!b) | 428 | if (!b) |
| 422 | return -ENOMEM; | 429 | return -ENOMEM; |
| 423 | 430 | ||
| 424 | b->block = block; | 431 | b->block = block; |
| 425 | b->bank = bank; | 432 | b->bank = bank; |
| 426 | b->cpu = cpu; | 433 | b->cpu = cpu; |
| 427 | b->address = address; | 434 | b->address = address; |
| 428 | b->interrupt_enable = 0; | 435 | b->interrupt_enable = 0; |
| 429 | b->threshold_limit = THRESHOLD_MAX; | 436 | b->threshold_limit = THRESHOLD_MAX; |
| 430 | 437 | ||
| 431 | INIT_LIST_HEAD(&b->miscj); | 438 | INIT_LIST_HEAD(&b->miscj); |
| 432 | 439 | ||
| 433 | if (per_cpu(threshold_banks, cpu)[bank]->blocks) | 440 | if (per_cpu(threshold_banks, cpu)[bank]->blocks) { |
| 434 | list_add(&b->miscj, | 441 | list_add(&b->miscj, |
| 435 | &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); | 442 | &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); |
| 436 | else | 443 | } else { |
| 437 | per_cpu(threshold_banks, cpu)[bank]->blocks = b; | 444 | per_cpu(threshold_banks, cpu)[bank]->blocks = b; |
| 445 | } | ||
| 438 | 446 | ||
| 439 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, | 447 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, |
| 440 | per_cpu(threshold_banks, cpu)[bank]->kobj, | 448 | per_cpu(threshold_banks, cpu)[bank]->kobj, |
| @@ -447,8 +455,9 @@ recurse: | |||
| 447 | if (!address) | 455 | if (!address) |
| 448 | return 0; | 456 | return 0; |
| 449 | address += MCG_XBLK_ADDR; | 457 | address += MCG_XBLK_ADDR; |
| 450 | } else | 458 | } else { |
| 451 | ++address; | 459 | ++address; |
| 460 | } | ||
| 452 | 461 | ||
| 453 | err = allocate_threshold_blocks(cpu, bank, ++block, address); | 462 | err = allocate_threshold_blocks(cpu, bank, ++block, address); |
| 454 | if (err) | 463 | if (err) |
| @@ -500,13 +509,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
| 500 | if (!b) | 509 | if (!b) |
| 501 | goto out; | 510 | goto out; |
| 502 | 511 | ||
| 503 | err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj, | 512 | err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj, |
| 504 | b->kobj, name); | 513 | b->kobj, name); |
| 505 | if (err) | 514 | if (err) |
| 506 | goto out; | 515 | goto out; |
| 507 | 516 | ||
| 508 | cpumask_copy(b->cpus, cpu_core_mask(cpu)); | 517 | cpumask_copy(b->cpus, cpu_core_mask(cpu)); |
| 509 | per_cpu(threshold_banks, cpu)[bank] = b; | 518 | per_cpu(threshold_banks, cpu)[bank] = b; |
| 519 | |||
| 510 | goto out; | 520 | goto out; |
| 511 | } | 521 | } |
| 512 | #endif | 522 | #endif |
| @@ -522,7 +532,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
| 522 | goto out; | 532 | goto out; |
| 523 | } | 533 | } |
| 524 | 534 | ||
| 525 | b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); | 535 | b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj); |
| 526 | if (!b->kobj) | 536 | if (!b->kobj) |
| 527 | goto out_free; | 537 | goto out_free; |
| 528 | 538 | ||
| @@ -542,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
| 542 | if (i == cpu) | 552 | if (i == cpu) |
| 543 | continue; | 553 | continue; |
| 544 | 554 | ||
| 545 | err = sysfs_create_link(&per_cpu(device_mce, i).kobj, | 555 | err = sysfs_create_link(&per_cpu(mce_dev, i).kobj, |
| 546 | b->kobj, name); | 556 | b->kobj, name); |
| 547 | if (err) | 557 | if (err) |
| 548 | goto out; | 558 | goto out; |
| @@ -605,15 +615,13 @@ static void deallocate_threshold_block(unsigned int cpu, | |||
| 605 | 615 | ||
| 606 | static void threshold_remove_bank(unsigned int cpu, int bank) | 616 | static void threshold_remove_bank(unsigned int cpu, int bank) |
| 607 | { | 617 | { |
| 608 | int i = 0; | ||
| 609 | struct threshold_bank *b; | 618 | struct threshold_bank *b; |
| 610 | char name[32]; | 619 | char name[32]; |
| 620 | int i = 0; | ||
| 611 | 621 | ||
| 612 | b = per_cpu(threshold_banks, cpu)[bank]; | 622 | b = per_cpu(threshold_banks, cpu)[bank]; |
| 613 | |||
| 614 | if (!b) | 623 | if (!b) |
| 615 | return; | 624 | return; |
| 616 | |||
| 617 | if (!b->blocks) | 625 | if (!b->blocks) |
| 618 | goto free_out; | 626 | goto free_out; |
| 619 | 627 | ||
| @@ -622,8 +630,9 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
| 622 | #ifdef CONFIG_SMP | 630 | #ifdef CONFIG_SMP |
| 623 | /* sibling symlink */ | 631 | /* sibling symlink */ |
| 624 | if (shared_bank[bank] && b->blocks->cpu != cpu) { | 632 | if (shared_bank[bank] && b->blocks->cpu != cpu) { |
| 625 | sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); | 633 | sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name); |
| 626 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 634 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
| 635 | |||
| 627 | return; | 636 | return; |
| 628 | } | 637 | } |
| 629 | #endif | 638 | #endif |
| @@ -633,7 +642,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
| 633 | if (i == cpu) | 642 | if (i == cpu) |
| 634 | continue; | 643 | continue; |
| 635 | 644 | ||
| 636 | sysfs_remove_link(&per_cpu(device_mce, i).kobj, name); | 645 | sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name); |
| 637 | per_cpu(threshold_banks, i)[bank] = NULL; | 646 | per_cpu(threshold_banks, i)[bank] = NULL; |
| 638 | } | 647 | } |
| 639 | 648 | ||
| @@ -659,12 +668,9 @@ static void threshold_remove_device(unsigned int cpu) | |||
| 659 | } | 668 | } |
| 660 | 669 | ||
| 661 | /* get notified when a cpu comes on/off */ | 670 | /* get notified when a cpu comes on/off */ |
| 662 | static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, | 671 | static void __cpuinit |
| 663 | unsigned int cpu) | 672 | amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu) |
| 664 | { | 673 | { |
| 665 | if (cpu >= NR_CPUS) | ||
| 666 | return; | ||
| 667 | |||
| 668 | switch (action) { | 674 | switch (action) { |
| 669 | case CPU_ONLINE: | 675 | case CPU_ONLINE: |
| 670 | case CPU_ONLINE_FROZEN: | 676 | case CPU_ONLINE_FROZEN: |
| @@ -686,11 +692,12 @@ static __init int threshold_init_device(void) | |||
| 686 | /* to hit CPUs online before the notifier is up */ | 692 | /* to hit CPUs online before the notifier is up */ |
| 687 | for_each_online_cpu(lcpu) { | 693 | for_each_online_cpu(lcpu) { |
| 688 | int err = threshold_create_device(lcpu); | 694 | int err = threshold_create_device(lcpu); |
| 695 | |||
| 689 | if (err) | 696 | if (err) |
| 690 | return err; | 697 | return err; |
| 691 | } | 698 | } |
| 692 | threshold_cpu_callback = amd_64_threshold_cpu_callback; | 699 | threshold_cpu_callback = amd_64_threshold_cpu_callback; |
| 700 | |||
| 693 | return 0; | 701 | return 0; |
| 694 | } | 702 | } |
| 695 | |||
| 696 | device_initcall(threshold_init_device); | 703 | device_initcall(threshold_init_device); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c new file mode 100644 index 000000000000..2b011d2d8579 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | /* | ||
| 2 | * Common code for Intel machine checks | ||
| 3 | */ | ||
| 4 | #include <linux/interrupt.h> | ||
| 5 | #include <linux/kernel.h> | ||
| 6 | #include <linux/types.h> | ||
| 7 | #include <linux/init.h> | ||
| 8 | #include <linux/smp.h> | ||
| 9 | |||
| 10 | #include <asm/therm_throt.h> | ||
| 11 | #include <asm/processor.h> | ||
| 12 | #include <asm/system.h> | ||
| 13 | #include <asm/apic.h> | ||
| 14 | #include <asm/msr.h> | ||
| 15 | |||
| 16 | #include "mce.h" | ||
| 17 | |||
| 18 | void intel_init_thermal(struct cpuinfo_x86 *c) | ||
| 19 | { | ||
| 20 | unsigned int cpu = smp_processor_id(); | ||
| 21 | int tm2 = 0; | ||
| 22 | u32 l, h; | ||
| 23 | |||
| 24 | /* Thermal monitoring depends on ACPI and clock modulation*/ | ||
| 25 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | ||
| 26 | return; | ||
| 27 | |||
| 28 | /* | ||
| 29 | * First check if its enabled already, in which case there might | ||
| 30 | * be some SMM goo which handles it, so we can't even put a handler | ||
| 31 | * since it might be delivered via SMI already: | ||
| 32 | */ | ||
| 33 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
| 34 | h = apic_read(APIC_LVTTHMR); | ||
| 35 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | ||
| 36 | printk(KERN_DEBUG | ||
| 37 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | ||
| 38 | return; | ||
| 39 | } | ||
| 40 | |||
| 41 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | ||
| 42 | tm2 = 1; | ||
| 43 | |||
| 44 | /* Check whether a vector already exists */ | ||
| 45 | if (h & APIC_VECTOR_MASK) { | ||
| 46 | printk(KERN_DEBUG | ||
| 47 | "CPU%d: Thermal LVT vector (%#x) already installed\n", | ||
| 48 | cpu, (h & APIC_VECTOR_MASK)); | ||
| 49 | return; | ||
| 50 | } | ||
| 51 | |||
| 52 | /* We'll mask the thermal vector in the lapic till we're ready: */ | ||
| 53 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; | ||
| 54 | apic_write(APIC_LVTTHMR, h); | ||
| 55 | |||
| 56 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | ||
| 57 | wrmsr(MSR_IA32_THERM_INTERRUPT, | ||
| 58 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | ||
| 59 | |||
| 60 | intel_set_thermal_handler(); | ||
| 61 | |||
| 62 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
| 63 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | ||
| 64 | |||
| 65 | /* Unmask the thermal vector: */ | ||
| 66 | l = apic_read(APIC_LVTTHMR); | ||
| 67 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
| 68 | |||
| 69 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | ||
| 70 | cpu, tm2 ? "TM2" : "TM1"); | ||
| 71 | |||
| 72 | /* enable thermal throttle processing */ | ||
| 73 | atomic_set(&therm_throt_en, 1); | ||
| 74 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index 65a0fceedcd7..f2ef6952c400 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c | |||
| @@ -16,6 +16,8 @@ | |||
| 16 | #include <asm/idle.h> | 16 | #include <asm/idle.h> |
| 17 | #include <asm/therm_throt.h> | 17 | #include <asm/therm_throt.h> |
| 18 | 18 | ||
| 19 | #include "mce.h" | ||
| 20 | |||
| 19 | asmlinkage void smp_thermal_interrupt(void) | 21 | asmlinkage void smp_thermal_interrupt(void) |
| 20 | { | 22 | { |
| 21 | __u64 msr_val; | 23 | __u64 msr_val; |
| @@ -26,67 +28,13 @@ asmlinkage void smp_thermal_interrupt(void) | |||
| 26 | irq_enter(); | 28 | irq_enter(); |
| 27 | 29 | ||
| 28 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | 30 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
| 29 | if (therm_throt_process(msr_val & 1)) | 31 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) |
| 30 | mce_log_therm_throt_event(msr_val); | 32 | mce_log_therm_throt_event(msr_val); |
| 31 | 33 | ||
| 32 | inc_irq_stat(irq_thermal_count); | 34 | inc_irq_stat(irq_thermal_count); |
| 33 | irq_exit(); | 35 | irq_exit(); |
| 34 | } | 36 | } |
| 35 | 37 | ||
| 36 | static void intel_init_thermal(struct cpuinfo_x86 *c) | ||
| 37 | { | ||
| 38 | u32 l, h; | ||
| 39 | int tm2 = 0; | ||
| 40 | unsigned int cpu = smp_processor_id(); | ||
| 41 | |||
| 42 | if (!cpu_has(c, X86_FEATURE_ACPI)) | ||
| 43 | return; | ||
| 44 | |||
| 45 | if (!cpu_has(c, X86_FEATURE_ACC)) | ||
| 46 | return; | ||
| 47 | |||
| 48 | /* first check if TM1 is already enabled by the BIOS, in which | ||
| 49 | * case there might be some SMM goo which handles it, so we can't even | ||
| 50 | * put a handler since it might be delivered via SMI already. | ||
| 51 | */ | ||
| 52 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
| 53 | h = apic_read(APIC_LVTTHMR); | ||
| 54 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | ||
| 55 | printk(KERN_DEBUG | ||
| 56 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | ||
| 57 | return; | ||
| 58 | } | ||
| 59 | |||
| 60 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | ||
| 61 | tm2 = 1; | ||
| 62 | |||
| 63 | if (h & APIC_VECTOR_MASK) { | ||
| 64 | printk(KERN_DEBUG | ||
| 65 | "CPU%d: Thermal LVT vector (%#x) already " | ||
| 66 | "installed\n", cpu, (h & APIC_VECTOR_MASK)); | ||
| 67 | return; | ||
| 68 | } | ||
| 69 | |||
| 70 | h = THERMAL_APIC_VECTOR; | ||
| 71 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); | ||
| 72 | apic_write(APIC_LVTTHMR, h); | ||
| 73 | |||
| 74 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | ||
| 75 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); | ||
| 76 | |||
| 77 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
| 78 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | ||
| 79 | |||
| 80 | l = apic_read(APIC_LVTTHMR); | ||
| 81 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
| 82 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | ||
| 83 | cpu, tm2 ? "TM2" : "TM1"); | ||
| 84 | |||
| 85 | /* enable thermal throttle processing */ | ||
| 86 | atomic_set(&therm_throt_en, 1); | ||
| 87 | return; | ||
| 88 | } | ||
| 89 | |||
| 90 | /* | 38 | /* |
| 91 | * Support for Intel Correct Machine Check Interrupts. This allows | 39 | * Support for Intel Correct Machine Check Interrupts. This allows |
| 92 | * the CPU to raise an interrupt when a corrected machine check happened. | 40 | * the CPU to raise an interrupt when a corrected machine check happened. |
| @@ -108,6 +56,9 @@ static int cmci_supported(int *banks) | |||
| 108 | { | 56 | { |
| 109 | u64 cap; | 57 | u64 cap; |
| 110 | 58 | ||
| 59 | if (mce_cmci_disabled || mce_ignore_ce) | ||
| 60 | return 0; | ||
| 61 | |||
| 111 | /* | 62 | /* |
| 112 | * Vendor check is not strictly needed, but the initial | 63 | * Vendor check is not strictly needed, but the initial |
| 113 | * initialization is vendor keyed and this | 64 | * initialization is vendor keyed and this |
| @@ -131,7 +82,7 @@ static int cmci_supported(int *banks) | |||
| 131 | static void intel_threshold_interrupt(void) | 82 | static void intel_threshold_interrupt(void) |
| 132 | { | 83 | { |
| 133 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | 84 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); |
| 134 | mce_notify_user(); | 85 | mce_notify_irq(); |
| 135 | } | 86 | } |
| 136 | 87 | ||
| 137 | static void print_update(char *type, int *hdr, int num) | 88 | static void print_update(char *type, int *hdr, int num) |
| @@ -247,7 +198,7 @@ void cmci_rediscover(int dying) | |||
| 247 | return; | 198 | return; |
| 248 | cpumask_copy(old, ¤t->cpus_allowed); | 199 | cpumask_copy(old, ¤t->cpus_allowed); |
| 249 | 200 | ||
| 250 | for_each_online_cpu (cpu) { | 201 | for_each_online_cpu(cpu) { |
| 251 | if (cpu == dying) | 202 | if (cpu == dying) |
| 252 | continue; | 203 | continue; |
| 253 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) | 204 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) |
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index a74af128efc9..70b710420f74 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c | |||
| @@ -6,15 +6,14 @@ | |||
| 6 | * This file contains routines to check for non-fatal MCEs every 15s | 6 | * This file contains routines to check for non-fatal MCEs every 15s |
| 7 | * | 7 | * |
| 8 | */ | 8 | */ |
| 9 | |||
| 10 | #include <linux/init.h> | ||
| 11 | #include <linux/types.h> | ||
| 12 | #include <linux/kernel.h> | ||
| 13 | #include <linux/jiffies.h> | ||
| 14 | #include <linux/workqueue.h> | ||
| 15 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
| 16 | #include <linux/smp.h> | 10 | #include <linux/workqueue.h> |
| 11 | #include <linux/jiffies.h> | ||
| 12 | #include <linux/kernel.h> | ||
| 17 | #include <linux/module.h> | 13 | #include <linux/module.h> |
| 14 | #include <linux/types.h> | ||
| 15 | #include <linux/init.h> | ||
| 16 | #include <linux/smp.h> | ||
| 18 | 17 | ||
| 19 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
| 20 | #include <asm/system.h> | 19 | #include <asm/system.h> |
| @@ -22,9 +21,9 @@ | |||
| 22 | 21 | ||
| 23 | #include "mce.h" | 22 | #include "mce.h" |
| 24 | 23 | ||
| 25 | static int firstbank; | 24 | static int firstbank; |
| 26 | 25 | ||
| 27 | #define MCE_RATE 15*HZ /* timer rate is 15s */ | 26 | #define MCE_RATE (15*HZ) /* timer rate is 15s */ |
| 28 | 27 | ||
| 29 | static void mce_checkregs(void *info) | 28 | static void mce_checkregs(void *info) |
| 30 | { | 29 | { |
| @@ -34,23 +33,24 @@ static void mce_checkregs(void *info) | |||
| 34 | for (i = firstbank; i < nr_mce_banks; i++) { | 33 | for (i = firstbank; i < nr_mce_banks; i++) { |
| 35 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | 34 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); |
| 36 | 35 | ||
| 37 | if (high & (1<<31)) { | 36 | if (!(high & (1<<31))) |
| 38 | printk(KERN_INFO "MCE: The hardware reports a non " | 37 | continue; |
| 39 | "fatal, correctable incident occurred on " | 38 | |
| 40 | "CPU %d.\n", | 39 | printk(KERN_INFO "MCE: The hardware reports a non fatal, " |
| 40 | "correctable incident occurred on CPU %d.\n", | ||
| 41 | smp_processor_id()); | 41 | smp_processor_id()); |
| 42 | printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); | 42 | |
| 43 | 43 | printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); | |
| 44 | /* | 44 | |
| 45 | * Scrub the error so we don't pick it up in MCE_RATE | 45 | /* |
| 46 | * seconds time. | 46 | * Scrub the error so we don't pick it up in MCE_RATE |
| 47 | */ | 47 | * seconds time: |
| 48 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | 48 | */ |
| 49 | 49 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | |
| 50 | /* Serialize */ | 50 | |
| 51 | wmb(); | 51 | /* Serialize: */ |
| 52 | add_taint(TAINT_MACHINE_CHECK); | 52 | wmb(); |
| 53 | } | 53 | add_taint(TAINT_MACHINE_CHECK); |
| 54 | } | 54 | } |
| 55 | } | 55 | } |
| 56 | 56 | ||
| @@ -77,16 +77,17 @@ static int __init init_nonfatal_mce_checker(void) | |||
| 77 | 77 | ||
| 78 | /* Some Athlons misbehave when we frob bank 0 */ | 78 | /* Some Athlons misbehave when we frob bank 0 */ |
| 79 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | 79 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && |
| 80 | boot_cpu_data.x86 == 6) | 80 | boot_cpu_data.x86 == 6) |
| 81 | firstbank = 1; | 81 | firstbank = 1; |
| 82 | else | 82 | else |
| 83 | firstbank = 0; | 83 | firstbank = 0; |
| 84 | 84 | ||
| 85 | /* | 85 | /* |
| 86 | * Check for non-fatal errors every MCE_RATE s | 86 | * Check for non-fatal errors every MCE_RATE s |
| 87 | */ | 87 | */ |
| 88 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); | 88 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); |
| 89 | printk(KERN_INFO "Machine check exception polling timer started.\n"); | 89 | printk(KERN_INFO "Machine check exception polling timer started.\n"); |
| 90 | |||
| 90 | return 0; | 91 | return 0; |
| 91 | } | 92 | } |
| 92 | module_init(init_nonfatal_mce_checker); | 93 | module_init(init_nonfatal_mce_checker); |
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index f53bdcbaf382..82cee108a2d3 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c | |||
| @@ -2,18 +2,17 @@ | |||
| 2 | * P4 specific Machine Check Exception Reporting | 2 | * P4 specific Machine Check Exception Reporting |
| 3 | */ | 3 | */ |
| 4 | 4 | ||
| 5 | #include <linux/init.h> | ||
| 6 | #include <linux/types.h> | ||
| 7 | #include <linux/kernel.h> | ||
| 8 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
| 6 | #include <linux/kernel.h> | ||
| 7 | #include <linux/types.h> | ||
| 8 | #include <linux/init.h> | ||
| 9 | #include <linux/smp.h> | 9 | #include <linux/smp.h> |
| 10 | 10 | ||
| 11 | #include <asm/therm_throt.h> | ||
| 11 | #include <asm/processor.h> | 12 | #include <asm/processor.h> |
| 12 | #include <asm/system.h> | 13 | #include <asm/system.h> |
| 13 | #include <asm/msr.h> | ||
| 14 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
| 15 | 15 | #include <asm/msr.h> | |
| 16 | #include <asm/therm_throt.h> | ||
| 17 | 16 | ||
| 18 | #include "mce.h" | 17 | #include "mce.h" |
| 19 | 18 | ||
| @@ -36,6 +35,7 @@ static int mce_num_extended_msrs; | |||
| 36 | 35 | ||
| 37 | 36 | ||
| 38 | #ifdef CONFIG_X86_MCE_P4THERMAL | 37 | #ifdef CONFIG_X86_MCE_P4THERMAL |
| 38 | |||
| 39 | static void unexpected_thermal_interrupt(struct pt_regs *regs) | 39 | static void unexpected_thermal_interrupt(struct pt_regs *regs) |
| 40 | { | 40 | { |
| 41 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | 41 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", |
| @@ -43,7 +43,7 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs) | |||
| 43 | add_taint(TAINT_MACHINE_CHECK); | 43 | add_taint(TAINT_MACHINE_CHECK); |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | /* P4/Xeon Thermal transition interrupt handler */ | 46 | /* P4/Xeon Thermal transition interrupt handler: */ |
| 47 | static void intel_thermal_interrupt(struct pt_regs *regs) | 47 | static void intel_thermal_interrupt(struct pt_regs *regs) |
| 48 | { | 48 | { |
| 49 | __u64 msr_val; | 49 | __u64 msr_val; |
| @@ -51,11 +51,12 @@ static void intel_thermal_interrupt(struct pt_regs *regs) | |||
| 51 | ack_APIC_irq(); | 51 | ack_APIC_irq(); |
| 52 | 52 | ||
| 53 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | 53 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
| 54 | therm_throt_process(msr_val & 0x1); | 54 | therm_throt_process(msr_val & THERM_STATUS_PROCHOT); |
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | /* Thermal interrupt handler for this CPU setup */ | 57 | /* Thermal interrupt handler for this CPU setup: */ |
| 58 | static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; | 58 | static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = |
| 59 | unexpected_thermal_interrupt; | ||
| 59 | 60 | ||
| 60 | void smp_thermal_interrupt(struct pt_regs *regs) | 61 | void smp_thermal_interrupt(struct pt_regs *regs) |
| 61 | { | 62 | { |
| @@ -65,67 +66,15 @@ void smp_thermal_interrupt(struct pt_regs *regs) | |||
| 65 | irq_exit(); | 66 | irq_exit(); |
| 66 | } | 67 | } |
| 67 | 68 | ||
| 68 | /* P4/Xeon Thermal regulation detect and init */ | 69 | void intel_set_thermal_handler(void) |
| 69 | static void intel_init_thermal(struct cpuinfo_x86 *c) | ||
| 70 | { | 70 | { |
| 71 | u32 l, h; | ||
| 72 | unsigned int cpu = smp_processor_id(); | ||
| 73 | |||
| 74 | /* Thermal monitoring */ | ||
| 75 | if (!cpu_has(c, X86_FEATURE_ACPI)) | ||
| 76 | return; /* -ENODEV */ | ||
| 77 | |||
| 78 | /* Clock modulation */ | ||
| 79 | if (!cpu_has(c, X86_FEATURE_ACC)) | ||
| 80 | return; /* -ENODEV */ | ||
| 81 | |||
| 82 | /* first check if its enabled already, in which case there might | ||
| 83 | * be some SMM goo which handles it, so we can't even put a handler | ||
| 84 | * since it might be delivered via SMI already -zwanem. | ||
| 85 | */ | ||
| 86 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
| 87 | h = apic_read(APIC_LVTTHMR); | ||
| 88 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | ||
| 89 | printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", | ||
| 90 | cpu); | ||
| 91 | return; /* -EBUSY */ | ||
| 92 | } | ||
| 93 | |||
| 94 | /* check whether a vector already exists, temporarily masked? */ | ||
| 95 | if (h & APIC_VECTOR_MASK) { | ||
| 96 | printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " | ||
| 97 | "installed\n", | ||
| 98 | cpu, (h & APIC_VECTOR_MASK)); | ||
| 99 | return; /* -EBUSY */ | ||
| 100 | } | ||
| 101 | |||
| 102 | /* The temperature transition interrupt handler setup */ | ||
| 103 | h = THERMAL_APIC_VECTOR; /* our delivery vector */ | ||
| 104 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ | ||
| 105 | apic_write(APIC_LVTTHMR, h); | ||
| 106 | |||
| 107 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | ||
| 108 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); | ||
| 109 | |||
| 110 | /* ok we're good to go... */ | ||
| 111 | vendor_thermal_interrupt = intel_thermal_interrupt; | 71 | vendor_thermal_interrupt = intel_thermal_interrupt; |
| 112 | |||
| 113 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
| 114 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | ||
| 115 | |||
| 116 | l = apic_read(APIC_LVTTHMR); | ||
| 117 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
| 118 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); | ||
| 119 | |||
| 120 | /* enable thermal throttle processing */ | ||
| 121 | atomic_set(&therm_throt_en, 1); | ||
| 122 | return; | ||
| 123 | } | 72 | } |
| 124 | #endif /* CONFIG_X86_MCE_P4THERMAL */ | ||
| 125 | 73 | ||
| 74 | #endif /* CONFIG_X86_MCE_P4THERMAL */ | ||
| 126 | 75 | ||
| 127 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ | 76 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ |
| 128 | static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | 77 | static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) |
| 129 | { | 78 | { |
| 130 | u32 h; | 79 | u32 h; |
| 131 | 80 | ||
| @@ -143,9 +92,9 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | |||
| 143 | 92 | ||
| 144 | static void intel_machine_check(struct pt_regs *regs, long error_code) | 93 | static void intel_machine_check(struct pt_regs *regs, long error_code) |
| 145 | { | 94 | { |
| 146 | int recover = 1; | ||
| 147 | u32 alow, ahigh, high, low; | 95 | u32 alow, ahigh, high, low; |
| 148 | u32 mcgstl, mcgsth; | 96 | u32 mcgstl, mcgsth; |
| 97 | int recover = 1; | ||
| 149 | int i; | 98 | int i; |
| 150 | 99 | ||
| 151 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 100 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
| @@ -157,7 +106,9 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
| 157 | 106 | ||
| 158 | if (mce_num_extended_msrs > 0) { | 107 | if (mce_num_extended_msrs > 0) { |
| 159 | struct intel_mce_extended_msrs dbg; | 108 | struct intel_mce_extended_msrs dbg; |
| 109 | |||
| 160 | intel_get_extended_msrs(&dbg); | 110 | intel_get_extended_msrs(&dbg); |
| 111 | |||
| 161 | printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" | 112 | printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" |
| 162 | "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" | 113 | "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" |
| 163 | "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", | 114 | "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", |
| @@ -171,6 +122,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
| 171 | if (high & (1<<31)) { | 122 | if (high & (1<<31)) { |
| 172 | char misc[20]; | 123 | char misc[20]; |
| 173 | char addr[24]; | 124 | char addr[24]; |
| 125 | |||
| 174 | misc[0] = addr[0] = '\0'; | 126 | misc[0] = addr[0] = '\0'; |
| 175 | if (high & (1<<29)) | 127 | if (high & (1<<29)) |
| 176 | recover |= 1; | 128 | recover |= 1; |
| @@ -196,6 +148,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
| 196 | panic("Unable to continue"); | 148 | panic("Unable to continue"); |
| 197 | 149 | ||
| 198 | printk(KERN_EMERG "Attempting to continue.\n"); | 150 | printk(KERN_EMERG "Attempting to continue.\n"); |
| 151 | |||
| 199 | /* | 152 | /* |
| 200 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | 153 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not |
| 201 | * recoverable/continuable.This will allow BIOS to look at the MSRs | 154 | * recoverable/continuable.This will allow BIOS to look at the MSRs |
| @@ -217,7 +170,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
| 217 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 170 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
| 218 | } | 171 | } |
| 219 | 172 | ||
| 220 | |||
| 221 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c) | 173 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c) |
| 222 | { | 174 | { |
| 223 | u32 l, h; | 175 | u32 l, h; |
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index c9f77ea69edc..015f481ab1b0 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
| @@ -2,11 +2,10 @@ | |||
| 2 | * P5 specific Machine Check Exception Reporting | 2 | * P5 specific Machine Check Exception Reporting |
| 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> | 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> |
| 4 | */ | 4 | */ |
| 5 | |||
| 6 | #include <linux/init.h> | ||
| 7 | #include <linux/types.h> | ||
| 8 | #include <linux/kernel.h> | ||
| 9 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
| 6 | #include <linux/kernel.h> | ||
| 7 | #include <linux/types.h> | ||
| 8 | #include <linux/init.h> | ||
| 10 | #include <linux/smp.h> | 9 | #include <linux/smp.h> |
| 11 | 10 | ||
| 12 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
| @@ -15,39 +14,58 @@ | |||
| 15 | 14 | ||
| 16 | #include "mce.h" | 15 | #include "mce.h" |
| 17 | 16 | ||
| 18 | /* Machine check handler for Pentium class Intel */ | 17 | /* By default disabled */ |
| 18 | int mce_p5_enable; | ||
| 19 | |||
| 20 | /* Machine check handler for Pentium class Intel CPUs: */ | ||
| 19 | static void pentium_machine_check(struct pt_regs *regs, long error_code) | 21 | static void pentium_machine_check(struct pt_regs *regs, long error_code) |
| 20 | { | 22 | { |
| 21 | u32 loaddr, hi, lotype; | 23 | u32 loaddr, hi, lotype; |
| 24 | |||
| 22 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); | 25 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); |
| 23 | rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); | 26 | rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); |
| 24 | printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); | 27 | |
| 25 | if (lotype&(1<<5)) | 28 | printk(KERN_EMERG |
| 26 | printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); | 29 | "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", |
| 30 | smp_processor_id(), loaddr, lotype); | ||
| 31 | |||
| 32 | if (lotype & (1<<5)) { | ||
| 33 | printk(KERN_EMERG | ||
| 34 | "CPU#%d: Possible thermal failure (CPU on fire ?).\n", | ||
| 35 | smp_processor_id()); | ||
| 36 | } | ||
| 37 | |||
| 27 | add_taint(TAINT_MACHINE_CHECK); | 38 | add_taint(TAINT_MACHINE_CHECK); |
| 28 | } | 39 | } |
| 29 | 40 | ||
| 30 | /* Set up machine check reporting for processors with Intel style MCE */ | 41 | /* Set up machine check reporting for processors with Intel style MCE: */ |
| 31 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c) | 42 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c) |
| 32 | { | 43 | { |
| 33 | u32 l, h; | 44 | u32 l, h; |
| 34 | 45 | ||
| 35 | /*Check for MCE support */ | 46 | /* Check for MCE support: */ |
| 36 | if (!cpu_has(c, X86_FEATURE_MCE)) | 47 | if (!cpu_has(c, X86_FEATURE_MCE)) |
| 37 | return; | 48 | return; |
| 38 | 49 | ||
| 39 | /* Default P5 to off as its often misconnected */ | 50 | #ifdef CONFIG_X86_OLD_MCE |
| 51 | /* Default P5 to off as its often misconnected: */ | ||
| 40 | if (mce_disabled != -1) | 52 | if (mce_disabled != -1) |
| 41 | return; | 53 | return; |
| 54 | #endif | ||
| 55 | |||
| 42 | machine_check_vector = pentium_machine_check; | 56 | machine_check_vector = pentium_machine_check; |
| 57 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
| 43 | wmb(); | 58 | wmb(); |
| 44 | 59 | ||
| 45 | /* Read registers before enabling */ | 60 | /* Read registers before enabling: */ |
| 46 | rdmsr(MSR_IA32_P5_MC_ADDR, l, h); | 61 | rdmsr(MSR_IA32_P5_MC_ADDR, l, h); |
| 47 | rdmsr(MSR_IA32_P5_MC_TYPE, l, h); | 62 | rdmsr(MSR_IA32_P5_MC_TYPE, l, h); |
| 48 | printk(KERN_INFO "Intel old style machine check architecture supported.\n"); | 63 | printk(KERN_INFO |
| 64 | "Intel old style machine check architecture supported.\n"); | ||
| 49 | 65 | ||
| 50 | /* Enable MCE */ | 66 | /* Enable MCE: */ |
| 51 | set_in_cr4(X86_CR4_MCE); | 67 | set_in_cr4(X86_CR4_MCE); |
| 52 | printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); | 68 | printk(KERN_INFO |
| 69 | "Intel old style machine check reporting enabled on CPU#%d.\n", | ||
| 70 | smp_processor_id()); | ||
| 53 | } | 71 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c index 2ac52d7b434b..43c24e667457 100644 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ b/arch/x86/kernel/cpu/mcheck/p6.c | |||
| @@ -2,11 +2,10 @@ | |||
| 2 | * P6 specific Machine Check Exception Reporting | 2 | * P6 specific Machine Check Exception Reporting |
| 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> | 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> |
| 4 | */ | 4 | */ |
| 5 | |||
| 6 | #include <linux/init.h> | ||
| 7 | #include <linux/types.h> | ||
| 8 | #include <linux/kernel.h> | ||
| 9 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
| 6 | #include <linux/kernel.h> | ||
| 7 | #include <linux/types.h> | ||
| 8 | #include <linux/init.h> | ||
| 10 | #include <linux/smp.h> | 9 | #include <linux/smp.h> |
| 11 | 10 | ||
| 12 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
| @@ -18,9 +17,9 @@ | |||
| 18 | /* Machine Check Handler For PII/PIII */ | 17 | /* Machine Check Handler For PII/PIII */ |
| 19 | static void intel_machine_check(struct pt_regs *regs, long error_code) | 18 | static void intel_machine_check(struct pt_regs *regs, long error_code) |
| 20 | { | 19 | { |
| 21 | int recover = 1; | ||
| 22 | u32 alow, ahigh, high, low; | 20 | u32 alow, ahigh, high, low; |
| 23 | u32 mcgstl, mcgsth; | 21 | u32 mcgstl, mcgsth; |
| 22 | int recover = 1; | ||
| 24 | int i; | 23 | int i; |
| 25 | 24 | ||
| 26 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 25 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
| @@ -35,12 +34,16 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
| 35 | if (high & (1<<31)) { | 34 | if (high & (1<<31)) { |
| 36 | char misc[20]; | 35 | char misc[20]; |
| 37 | char addr[24]; | 36 | char addr[24]; |
| 38 | misc[0] = addr[0] = '\0'; | 37 | |
| 38 | misc[0] = '\0'; | ||
| 39 | addr[0] = '\0'; | ||
| 40 | |||
| 39 | if (high & (1<<29)) | 41 | if (high & (1<<29)) |
| 40 | recover |= 1; | 42 | recover |= 1; |
| 41 | if (high & (1<<25)) | 43 | if (high & (1<<25)) |
| 42 | recover |= 2; | 44 | recover |= 2; |
| 43 | high &= ~(1<<31); | 45 | high &= ~(1<<31); |
| 46 | |||
| 44 | if (high & (1<<27)) { | 47 | if (high & (1<<27)) { |
| 45 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | 48 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); |
| 46 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | 49 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); |
| @@ -49,6 +52,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
| 49 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | 52 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); |
| 50 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | 53 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); |
| 51 | } | 54 | } |
| 55 | |||
| 52 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | 56 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", |
| 53 | smp_processor_id(), i, high, low, misc, addr); | 57 | smp_processor_id(), i, high, low, misc, addr); |
| 54 | } | 58 | } |
| @@ -63,16 +67,17 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
| 63 | /* | 67 | /* |
| 64 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | 68 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not |
| 65 | * recoverable/continuable.This will allow BIOS to look at the MSRs | 69 | * recoverable/continuable.This will allow BIOS to look at the MSRs |
| 66 | * for errors if the OS could not log the error. | 70 | * for errors if the OS could not log the error: |
| 67 | */ | 71 | */ |
| 68 | for (i = 0; i < nr_mce_banks; i++) { | 72 | for (i = 0; i < nr_mce_banks; i++) { |
| 69 | unsigned int msr; | 73 | unsigned int msr; |
| 74 | |||
| 70 | msr = MSR_IA32_MC0_STATUS+i*4; | 75 | msr = MSR_IA32_MC0_STATUS+i*4; |
| 71 | rdmsr(msr, low, high); | 76 | rdmsr(msr, low, high); |
| 72 | if (high & (1<<31)) { | 77 | if (high & (1<<31)) { |
| 73 | /* Clear it */ | 78 | /* Clear it: */ |
| 74 | wrmsr(msr, 0UL, 0UL); | 79 | wrmsr(msr, 0UL, 0UL); |
| 75 | /* Serialize */ | 80 | /* Serialize: */ |
| 76 | wmb(); | 81 | wmb(); |
| 77 | add_taint(TAINT_MACHINE_CHECK); | 82 | add_taint(TAINT_MACHINE_CHECK); |
| 78 | } | 83 | } |
| @@ -81,7 +86,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
| 81 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 86 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
| 82 | } | 87 | } |
| 83 | 88 | ||
| 84 | /* Set up machine check reporting for processors with Intel style MCE */ | 89 | /* Set up machine check reporting for processors with Intel style MCE: */ |
| 85 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c) | 90 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c) |
| 86 | { | 91 | { |
| 87 | u32 l, h; | 92 | u32 l, h; |
| @@ -97,6 +102,7 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c) | |||
| 97 | 102 | ||
| 98 | /* Ok machine check is available */ | 103 | /* Ok machine check is available */ |
| 99 | machine_check_vector = intel_machine_check; | 104 | machine_check_vector = intel_machine_check; |
| 105 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
| 100 | wmb(); | 106 | wmb(); |
| 101 | 107 | ||
| 102 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | 108 | printk(KERN_INFO "Intel machine check architecture supported.\n"); |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index d5ae2243f0b9..7b1ae2e20ba5 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * | ||
| 3 | * Thermal throttle event support code (such as syslog messaging and rate | 2 | * Thermal throttle event support code (such as syslog messaging and rate |
| 4 | * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). | 3 | * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). |
| 4 | * | ||
| 5 | * This allows consistent reporting of CPU thermal throttle events. | 5 | * This allows consistent reporting of CPU thermal throttle events. |
| 6 | * | 6 | * |
| 7 | * Maintains a counter in /sys that keeps track of the number of thermal | 7 | * Maintains a counter in /sys that keeps track of the number of thermal |
| @@ -13,43 +13,43 @@ | |||
| 13 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. | 13 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. |
| 14 | * Inspired by Ross Biro's and Al Borchers' counter code. | 14 | * Inspired by Ross Biro's and Al Borchers' counter code. |
| 15 | */ | 15 | */ |
| 16 | 16 | #include <linux/notifier.h> | |
| 17 | #include <linux/jiffies.h> | ||
| 17 | #include <linux/percpu.h> | 18 | #include <linux/percpu.h> |
| 18 | #include <linux/sysdev.h> | 19 | #include <linux/sysdev.h> |
| 19 | #include <linux/cpu.h> | 20 | #include <linux/cpu.h> |
| 20 | #include <asm/cpu.h> | 21 | |
| 21 | #include <linux/notifier.h> | ||
| 22 | #include <linux/jiffies.h> | ||
| 23 | #include <asm/therm_throt.h> | 22 | #include <asm/therm_throt.h> |
| 24 | 23 | ||
| 25 | /* How long to wait between reporting thermal events */ | 24 | /* How long to wait between reporting thermal events */ |
| 26 | #define CHECK_INTERVAL (300 * HZ) | 25 | #define CHECK_INTERVAL (300 * HZ) |
| 27 | 26 | ||
| 28 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; | 27 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; |
| 29 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); | 28 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); |
| 30 | atomic_t therm_throt_en = ATOMIC_INIT(0); | 29 | |
| 30 | atomic_t therm_throt_en = ATOMIC_INIT(0); | ||
| 31 | 31 | ||
| 32 | #ifdef CONFIG_SYSFS | 32 | #ifdef CONFIG_SYSFS |
| 33 | #define define_therm_throt_sysdev_one_ro(_name) \ | 33 | #define define_therm_throt_sysdev_one_ro(_name) \ |
| 34 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | 34 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) |
| 35 | 35 | ||
| 36 | #define define_therm_throt_sysdev_show_func(name) \ | 36 | #define define_therm_throt_sysdev_show_func(name) \ |
| 37 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ | 37 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ |
| 38 | struct sysdev_attribute *attr, \ | 38 | struct sysdev_attribute *attr, \ |
| 39 | char *buf) \ | 39 | char *buf) \ |
| 40 | { \ | 40 | { \ |
| 41 | unsigned int cpu = dev->id; \ | 41 | unsigned int cpu = dev->id; \ |
| 42 | ssize_t ret; \ | 42 | ssize_t ret; \ |
| 43 | \ | 43 | \ |
| 44 | preempt_disable(); /* CPU hotplug */ \ | 44 | preempt_disable(); /* CPU hotplug */ \ |
| 45 | if (cpu_online(cpu)) \ | 45 | if (cpu_online(cpu)) \ |
| 46 | ret = sprintf(buf, "%lu\n", \ | 46 | ret = sprintf(buf, "%lu\n", \ |
| 47 | per_cpu(thermal_throttle_##name, cpu)); \ | 47 | per_cpu(thermal_throttle_##name, cpu)); \ |
| 48 | else \ | 48 | else \ |
| 49 | ret = 0; \ | 49 | ret = 0; \ |
| 50 | preempt_enable(); \ | 50 | preempt_enable(); \ |
| 51 | \ | 51 | \ |
| 52 | return ret; \ | 52 | return ret; \ |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | define_therm_throt_sysdev_show_func(count); | 55 | define_therm_throt_sysdev_show_func(count); |
| @@ -61,8 +61,8 @@ static struct attribute *thermal_throttle_attrs[] = { | |||
| 61 | }; | 61 | }; |
| 62 | 62 | ||
| 63 | static struct attribute_group thermal_throttle_attr_group = { | 63 | static struct attribute_group thermal_throttle_attr_group = { |
| 64 | .attrs = thermal_throttle_attrs, | 64 | .attrs = thermal_throttle_attrs, |
| 65 | .name = "thermal_throttle" | 65 | .name = "thermal_throttle" |
| 66 | }; | 66 | }; |
| 67 | #endif /* CONFIG_SYSFS */ | 67 | #endif /* CONFIG_SYSFS */ |
| 68 | 68 | ||
| @@ -110,10 +110,11 @@ int therm_throt_process(int curr) | |||
| 110 | } | 110 | } |
| 111 | 111 | ||
| 112 | #ifdef CONFIG_SYSFS | 112 | #ifdef CONFIG_SYSFS |
| 113 | /* Add/Remove thermal_throttle interface for CPU device */ | 113 | /* Add/Remove thermal_throttle interface for CPU device: */ |
| 114 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) | 114 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) |
| 115 | { | 115 | { |
| 116 | return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); | 116 | return sysfs_create_group(&sys_dev->kobj, |
| 117 | &thermal_throttle_attr_group); | ||
| 117 | } | 118 | } |
| 118 | 119 | ||
| 119 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | 120 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) |
| @@ -121,19 +122,21 @@ static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | |||
| 121 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); | 122 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); |
| 122 | } | 123 | } |
| 123 | 124 | ||
| 124 | /* Mutex protecting device creation against CPU hotplug */ | 125 | /* Mutex protecting device creation against CPU hotplug: */ |
| 125 | static DEFINE_MUTEX(therm_cpu_lock); | 126 | static DEFINE_MUTEX(therm_cpu_lock); |
| 126 | 127 | ||
| 127 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | 128 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ |
| 128 | static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, | 129 | static __cpuinit int |
| 129 | unsigned long action, | 130 | thermal_throttle_cpu_callback(struct notifier_block *nfb, |
| 130 | void *hcpu) | 131 | unsigned long action, |
| 132 | void *hcpu) | ||
| 131 | { | 133 | { |
| 132 | unsigned int cpu = (unsigned long)hcpu; | 134 | unsigned int cpu = (unsigned long)hcpu; |
| 133 | struct sys_device *sys_dev; | 135 | struct sys_device *sys_dev; |
| 134 | int err = 0; | 136 | int err = 0; |
| 135 | 137 | ||
| 136 | sys_dev = get_cpu_sysdev(cpu); | 138 | sys_dev = get_cpu_sysdev(cpu); |
| 139 | |||
| 137 | switch (action) { | 140 | switch (action) { |
| 138 | case CPU_UP_PREPARE: | 141 | case CPU_UP_PREPARE: |
| 139 | case CPU_UP_PREPARE_FROZEN: | 142 | case CPU_UP_PREPARE_FROZEN: |
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index 23ee9e730f78..d746df2909c9 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c | |||
| @@ -17,7 +17,7 @@ static void default_threshold_interrupt(void) | |||
| 17 | 17 | ||
| 18 | void (*mce_threshold_vector)(void) = default_threshold_interrupt; | 18 | void (*mce_threshold_vector)(void) = default_threshold_interrupt; |
| 19 | 19 | ||
| 20 | asmlinkage void mce_threshold_interrupt(void) | 20 | asmlinkage void smp_threshold_interrupt(void) |
| 21 | { | 21 | { |
| 22 | exit_idle(); | 22 | exit_idle(); |
| 23 | irq_enter(); | 23 | irq_enter(); |
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 2a043d89811d..81b02487090b 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
| @@ -2,11 +2,10 @@ | |||
| 2 | * IDT Winchip specific Machine Check Exception Reporting | 2 | * IDT Winchip specific Machine Check Exception Reporting |
| 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> | 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> |
| 4 | */ | 4 | */ |
| 5 | |||
| 6 | #include <linux/init.h> | ||
| 7 | #include <linux/types.h> | ||
| 8 | #include <linux/kernel.h> | ||
| 9 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
| 6 | #include <linux/kernel.h> | ||
| 7 | #include <linux/types.h> | ||
| 8 | #include <linux/init.h> | ||
| 10 | 9 | ||
| 11 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
| 12 | #include <asm/system.h> | 11 | #include <asm/system.h> |
| @@ -14,7 +13,7 @@ | |||
| 14 | 13 | ||
| 15 | #include "mce.h" | 14 | #include "mce.h" |
| 16 | 15 | ||
| 17 | /* Machine check handler for WinChip C6 */ | 16 | /* Machine check handler for WinChip C6: */ |
| 18 | static void winchip_machine_check(struct pt_regs *regs, long error_code) | 17 | static void winchip_machine_check(struct pt_regs *regs, long error_code) |
| 19 | { | 18 | { |
| 20 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); | 19 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); |
| @@ -25,12 +24,18 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code) | |||
| 25 | void winchip_mcheck_init(struct cpuinfo_x86 *c) | 24 | void winchip_mcheck_init(struct cpuinfo_x86 *c) |
| 26 | { | 25 | { |
| 27 | u32 lo, hi; | 26 | u32 lo, hi; |
| 27 | |||
| 28 | machine_check_vector = winchip_machine_check; | 28 | machine_check_vector = winchip_machine_check; |
| 29 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
| 29 | wmb(); | 30 | wmb(); |
| 31 | |||
| 30 | rdmsr(MSR_IDT_FCR1, lo, hi); | 32 | rdmsr(MSR_IDT_FCR1, lo, hi); |
| 31 | lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */ | 33 | lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */ |
| 32 | lo &= ~(1<<4); /* Enable MCE */ | 34 | lo &= ~(1<<4); /* Enable MCE */ |
| 33 | wrmsr(MSR_IDT_FCR1, lo, hi); | 35 | wrmsr(MSR_IDT_FCR1, lo, hi); |
| 36 | |||
| 34 | set_in_cr4(X86_CR4_MCE); | 37 | set_in_cr4(X86_CR4_MCE); |
| 35 | printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); | 38 | |
| 39 | printk(KERN_INFO | ||
| 40 | "Winchip machine check reporting enabled on CPU#0.\n"); | ||
| 36 | } | 41 | } |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a4742a340d8d..de74f0a3e0ed 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
| @@ -963,6 +963,8 @@ END(\sym) | |||
| 963 | #ifdef CONFIG_SMP | 963 | #ifdef CONFIG_SMP |
| 964 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ | 964 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ |
| 965 | irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt | 965 | irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt |
| 966 | apicinterrupt REBOOT_VECTOR \ | ||
| 967 | reboot_interrupt smp_reboot_interrupt | ||
| 966 | #endif | 968 | #endif |
| 967 | 969 | ||
| 968 | #ifdef CONFIG_X86_UV | 970 | #ifdef CONFIG_X86_UV |
| @@ -994,10 +996,15 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ | |||
| 994 | #endif | 996 | #endif |
| 995 | 997 | ||
| 996 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 998 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
| 997 | threshold_interrupt mce_threshold_interrupt | 999 | threshold_interrupt smp_threshold_interrupt |
| 998 | apicinterrupt THERMAL_APIC_VECTOR \ | 1000 | apicinterrupt THERMAL_APIC_VECTOR \ |
| 999 | thermal_interrupt smp_thermal_interrupt | 1001 | thermal_interrupt smp_thermal_interrupt |
| 1000 | 1002 | ||
| 1003 | #ifdef CONFIG_X86_MCE | ||
| 1004 | apicinterrupt MCE_SELF_VECTOR \ | ||
| 1005 | mce_self_interrupt smp_mce_self_interrupt | ||
| 1006 | #endif | ||
| 1007 | |||
| 1001 | #ifdef CONFIG_SMP | 1008 | #ifdef CONFIG_SMP |
| 1002 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ | 1009 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ |
| 1003 | call_function_single_interrupt smp_call_function_single_interrupt | 1010 | call_function_single_interrupt smp_call_function_single_interrupt |
| @@ -1379,7 +1386,7 @@ errorentry xen_stack_segment do_stack_segment | |||
| 1379 | errorentry general_protection do_general_protection | 1386 | errorentry general_protection do_general_protection |
| 1380 | errorentry page_fault do_page_fault | 1387 | errorentry page_fault do_page_fault |
| 1381 | #ifdef CONFIG_X86_MCE | 1388 | #ifdef CONFIG_X86_MCE |
| 1382 | paranoidzeroentry machine_check do_machine_check | 1389 | paranoidzeroentry machine_check *machine_check_vector(%rip) |
| 1383 | #endif | 1390 | #endif |
| 1384 | 1391 | ||
| 1385 | /* | 1392 | /* |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 38287b5f116e..b0cdde6932f5 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <asm/io_apic.h> | 12 | #include <asm/io_apic.h> |
| 13 | #include <asm/irq.h> | 13 | #include <asm/irq.h> |
| 14 | #include <asm/idle.h> | 14 | #include <asm/idle.h> |
| 15 | #include <asm/mce.h> | ||
| 15 | #include <asm/hw_irq.h> | 16 | #include <asm/hw_irq.h> |
| 16 | 17 | ||
| 17 | atomic_t irq_err_count; | 18 | atomic_t irq_err_count; |
| @@ -96,13 +97,23 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
| 96 | for_each_online_cpu(j) | 97 | for_each_online_cpu(j) |
| 97 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); | 98 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); |
| 98 | seq_printf(p, " Thermal event interrupts\n"); | 99 | seq_printf(p, " Thermal event interrupts\n"); |
| 99 | # ifdef CONFIG_X86_64 | 100 | # ifdef CONFIG_X86_MCE_THRESHOLD |
| 100 | seq_printf(p, "%*s: ", prec, "THR"); | 101 | seq_printf(p, "%*s: ", prec, "THR"); |
| 101 | for_each_online_cpu(j) | 102 | for_each_online_cpu(j) |
| 102 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); | 103 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); |
| 103 | seq_printf(p, " Threshold APIC interrupts\n"); | 104 | seq_printf(p, " Threshold APIC interrupts\n"); |
| 104 | # endif | 105 | # endif |
| 105 | #endif | 106 | #endif |
| 107 | #ifdef CONFIG_X86_NEW_MCE | ||
| 108 | seq_printf(p, "%*s: ", prec, "MCE"); | ||
| 109 | for_each_online_cpu(j) | ||
| 110 | seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); | ||
| 111 | seq_printf(p, " Machine check exceptions\n"); | ||
| 112 | seq_printf(p, "%*s: ", prec, "MCP"); | ||
| 113 | for_each_online_cpu(j) | ||
| 114 | seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); | ||
| 115 | seq_printf(p, " Machine check polls\n"); | ||
| 116 | #endif | ||
| 106 | seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); | 117 | seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); |
| 107 | #if defined(CONFIG_X86_IO_APIC) | 118 | #if defined(CONFIG_X86_IO_APIC) |
| 108 | seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); | 119 | seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); |
| @@ -185,10 +196,14 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
| 185 | #endif | 196 | #endif |
| 186 | #ifdef CONFIG_X86_MCE | 197 | #ifdef CONFIG_X86_MCE |
| 187 | sum += irq_stats(cpu)->irq_thermal_count; | 198 | sum += irq_stats(cpu)->irq_thermal_count; |
| 188 | # ifdef CONFIG_X86_64 | 199 | # ifdef CONFIG_X86_MCE_THRESHOLD |
| 189 | sum += irq_stats(cpu)->irq_threshold_count; | 200 | sum += irq_stats(cpu)->irq_threshold_count; |
| 190 | # endif | 201 | # endif |
| 191 | #endif | 202 | #endif |
| 203 | #ifdef CONFIG_X86_NEW_MCE | ||
| 204 | sum += per_cpu(mce_exception_count, cpu); | ||
| 205 | sum += per_cpu(mce_poll_count, cpu); | ||
| 206 | #endif | ||
| 192 | return sum; | 207 | return sum; |
| 193 | } | 208 | } |
| 194 | 209 | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 267c6624c77f..696f0e475c2d 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
| @@ -173,6 +173,9 @@ static void __init smp_intr_init(void) | |||
| 173 | /* Low priority IPI to cleanup after moving an irq */ | 173 | /* Low priority IPI to cleanup after moving an irq */ |
| 174 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 174 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
| 175 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); | 175 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); |
| 176 | |||
| 177 | /* IPI used for rebooting/stopping */ | ||
| 178 | alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt); | ||
| 176 | #endif | 179 | #endif |
| 177 | #endif /* CONFIG_SMP */ | 180 | #endif /* CONFIG_SMP */ |
| 178 | } | 181 | } |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 0a813b17b172..4c578751e94e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
| @@ -24,11 +24,11 @@ | |||
| 24 | #include <asm/ucontext.h> | 24 | #include <asm/ucontext.h> |
| 25 | #include <asm/i387.h> | 25 | #include <asm/i387.h> |
| 26 | #include <asm/vdso.h> | 26 | #include <asm/vdso.h> |
| 27 | #include <asm/mce.h> | ||
| 27 | 28 | ||
| 28 | #ifdef CONFIG_X86_64 | 29 | #ifdef CONFIG_X86_64 |
| 29 | #include <asm/proto.h> | 30 | #include <asm/proto.h> |
| 30 | #include <asm/ia32_unistd.h> | 31 | #include <asm/ia32_unistd.h> |
| 31 | #include <asm/mce.h> | ||
| 32 | #endif /* CONFIG_X86_64 */ | 32 | #endif /* CONFIG_X86_64 */ |
| 33 | 33 | ||
| 34 | #include <asm/syscall.h> | 34 | #include <asm/syscall.h> |
| @@ -856,10 +856,10 @@ static void do_signal(struct pt_regs *regs) | |||
| 856 | void | 856 | void |
| 857 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 857 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
| 858 | { | 858 | { |
| 859 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) | 859 | #ifdef CONFIG_X86_NEW_MCE |
| 860 | /* notify userspace of pending MCEs */ | 860 | /* notify userspace of pending MCEs */ |
| 861 | if (thread_info_flags & _TIF_MCE_NOTIFY) | 861 | if (thread_info_flags & _TIF_MCE_NOTIFY) |
| 862 | mce_notify_user(); | 862 | mce_notify_process(); |
| 863 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ | 863 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ |
| 864 | 864 | ||
| 865 | /* deal with pending signal delivery */ | 865 | /* deal with pending signal delivery */ |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 28f5fb495a66..ec1de97600e7 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
| @@ -150,14 +150,40 @@ void native_send_call_func_ipi(const struct cpumask *mask) | |||
| 150 | * this function calls the 'stop' function on all other CPUs in the system. | 150 | * this function calls the 'stop' function on all other CPUs in the system. |
| 151 | */ | 151 | */ |
| 152 | 152 | ||
| 153 | asmlinkage void smp_reboot_interrupt(void) | ||
| 154 | { | ||
| 155 | ack_APIC_irq(); | ||
| 156 | irq_enter(); | ||
| 157 | stop_this_cpu(NULL); | ||
| 158 | irq_exit(); | ||
| 159 | } | ||
| 160 | |||
| 153 | static void native_smp_send_stop(void) | 161 | static void native_smp_send_stop(void) |
| 154 | { | 162 | { |
| 155 | unsigned long flags; | 163 | unsigned long flags; |
| 164 | unsigned long wait; | ||
| 156 | 165 | ||
| 157 | if (reboot_force) | 166 | if (reboot_force) |
| 158 | return; | 167 | return; |
| 159 | 168 | ||
| 160 | smp_call_function(stop_this_cpu, NULL, 0); | 169 | /* |
| 170 | * Use an own vector here because smp_call_function | ||
| 171 | * does lots of things not suitable in a panic situation. | ||
| 172 | * On most systems we could also use an NMI here, | ||
| 173 | * but there are a few systems around where NMI | ||
| 174 | * is problematic so stay with an non NMI for now | ||
| 175 | * (this implies we cannot stop CPUs spinning with irq off | ||
| 176 | * currently) | ||
| 177 | */ | ||
| 178 | if (num_online_cpus() > 1) { | ||
| 179 | apic->send_IPI_allbutself(REBOOT_VECTOR); | ||
| 180 | |||
| 181 | /* Don't wait longer than a second */ | ||
| 182 | wait = USEC_PER_SEC; | ||
| 183 | while (num_online_cpus() > 1 && wait--) | ||
| 184 | udelay(1); | ||
| 185 | } | ||
| 186 | |||
| 161 | local_irq_save(flags); | 187 | local_irq_save(flags); |
| 162 | disable_local_APIC(); | 188 | disable_local_APIC(); |
| 163 | local_irq_restore(flags); | 189 | local_irq_restore(flags); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 07d60c870ce2..1e1e27b7d438 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -798,15 +798,15 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) | |||
| 798 | 798 | ||
| 799 | return new_kesp; | 799 | return new_kesp; |
| 800 | } | 800 | } |
| 801 | #else | 801 | #endif |
| 802 | |||
| 802 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) | 803 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) |
| 803 | { | 804 | { |
| 804 | } | 805 | } |
| 805 | 806 | ||
| 806 | asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) | 807 | asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) |
| 807 | { | 808 | { |
| 808 | } | 809 | } |
| 809 | #endif | ||
| 810 | 810 | ||
| 811 | /* | 811 | /* |
| 812 | * 'math_state_restore()' saves the current math information in the | 812 | * 'math_state_restore()' saves the current math information in the |
diff --git a/kernel/timer.c b/kernel/timer.c index c01e568935ea..faf2db897de4 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -757,6 +757,7 @@ void add_timer_on(struct timer_list *timer, int cpu) | |||
| 757 | wake_up_idle_cpu(cpu); | 757 | wake_up_idle_cpu(cpu); |
| 758 | spin_unlock_irqrestore(&base->lock, flags); | 758 | spin_unlock_irqrestore(&base->lock, flags); |
| 759 | } | 759 | } |
| 760 | EXPORT_SYMBOL_GPL(add_timer_on); | ||
| 760 | 761 | ||
| 761 | /** | 762 | /** |
| 762 | * del_timer - deactive a timer. | 763 | * del_timer - deactive a timer. |
