diff options
-rw-r--r-- | Documentation/x86/x86_64/boot-options.txt | 36 | ||||
-rw-r--r-- | arch/x86/include/asm/mce.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 19 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 3 |
4 files changed, 52 insertions, 8 deletions
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 0ee5e3b212f3..fa2bed07d21e 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt | |||
@@ -7,12 +7,36 @@ Machine check | |||
7 | 7 | ||
8 | Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables. | 8 | Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables. |
9 | 9 | ||
10 | mce=off disable machine check | 10 | mce=off |
11 | mce=bootlog Enable logging of machine checks left over from booting. | 11 | Disable machine check |
12 | Disabled by default on AMD because some BIOS leave bogus ones. | 12 | mce=no_cmci |
13 | If your BIOS doesn't do that it's a good idea to enable though | 13 | Disable CMCI(Corrected Machine Check Interrupt) that |
14 | to make sure you log even machine check events that result | 14 | Intel processor supports. Usually this disablement is |
15 | in a reboot. On Intel systems it is enabled by default. | 15 | not recommended, but it might be handy if your hardware |
16 | is misbehaving. | ||
17 | Note that you'll get more problems without CMCI than with | ||
18 | due to the shared banks, i.e. you might get duplicated | ||
19 | error logs. | ||
20 | mce=dont_log_ce | ||
21 | Don't make logs for corrected errors. All events reported | ||
22 | as corrected are silently cleared by OS. | ||
23 | This option will be useful if you have no interest in any | ||
24 | of corrected errors. | ||
25 | mce=ignore_ce | ||
26 | Disable features for corrected errors, e.g. polling timer | ||
27 | and CMCI. All events reported as corrected are not cleared | ||
28 | by OS and remained in its error banks. | ||
29 | Usually this disablement is not recommended, however if | ||
30 | there is an agent checking/clearing corrected errors | ||
31 | (e.g. BIOS or hardware monitoring applications), conflicting | ||
32 | with OS's error handling, and you cannot deactivate the agent, | ||
33 | then this option will be a help. | ||
34 | mce=bootlog | ||
35 | Enable logging of machine checks left over from booting. | ||
36 | Disabled by default on AMD because some BIOS leave bogus ones. | ||
37 | If your BIOS doesn't do that it's a good idea to enable though | ||
38 | to make sure you log even machine check events that result | ||
39 | in a reboot. On Intel systems it is enabled by default. | ||
16 | mce=nobootlog | 40 | mce=nobootlog |
17 | Disable boot machine check logging. | 41 | Disable boot machine check logging. |
18 | mce=tolerancelevel[,monarchtimeout] (number,number) | 42 | mce=tolerancelevel[,monarchtimeout] (number,number) |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 82978ad12072..540a466e50f5 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -119,6 +119,8 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | |||
119 | #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) | 119 | #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) |
120 | 120 | ||
121 | #ifdef CONFIG_X86_MCE_INTEL | 121 | #ifdef CONFIG_X86_MCE_INTEL |
122 | extern int mce_cmci_disabled; | ||
123 | extern int mce_ignore_ce; | ||
122 | void mce_intel_feature_init(struct cpuinfo_x86 *c); | 124 | void mce_intel_feature_init(struct cpuinfo_x86 *c); |
123 | void cmci_clear(void); | 125 | void cmci_clear(void); |
124 | void cmci_reenable(void); | 126 | void cmci_reenable(void); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 6a3127ecb5cc..fabba15e4558 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -84,6 +84,9 @@ static int rip_msr; | |||
84 | static int mce_bootlog = -1; | 84 | static int mce_bootlog = -1; |
85 | static int monarch_timeout = -1; | 85 | static int monarch_timeout = -1; |
86 | static int mce_panic_timeout; | 86 | static int mce_panic_timeout; |
87 | static int mce_dont_log_ce; | ||
88 | int mce_cmci_disabled; | ||
89 | int mce_ignore_ce; | ||
87 | int mce_ser; | 90 | int mce_ser; |
88 | 91 | ||
89 | static char trigger[128]; | 92 | static char trigger[128]; |
@@ -526,7 +529,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
526 | * Don't get the IP here because it's unlikely to | 529 | * Don't get the IP here because it's unlikely to |
527 | * have anything to do with the actual error location. | 530 | * have anything to do with the actual error location. |
528 | */ | 531 | */ |
529 | if (!(flags & MCP_DONTLOG)) { | 532 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { |
530 | mce_log(&m); | 533 | mce_log(&m); |
531 | add_taint(TAINT_MACHINE_CHECK); | 534 | add_taint(TAINT_MACHINE_CHECK); |
532 | } | 535 | } |
@@ -1307,6 +1310,9 @@ static void mce_init_timer(void) | |||
1307 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1310 | struct timer_list *t = &__get_cpu_var(mce_timer); |
1308 | int *n = &__get_cpu_var(next_interval); | 1311 | int *n = &__get_cpu_var(next_interval); |
1309 | 1312 | ||
1313 | if (mce_ignore_ce) | ||
1314 | return; | ||
1315 | |||
1310 | *n = check_interval * HZ; | 1316 | *n = check_interval * HZ; |
1311 | if (!*n) | 1317 | if (!*n) |
1312 | return; | 1318 | return; |
@@ -1517,7 +1523,10 @@ static struct miscdevice mce_log_device = { | |||
1517 | }; | 1523 | }; |
1518 | 1524 | ||
1519 | /* | 1525 | /* |
1520 | * mce=off disables machine check | 1526 | * mce=off Disables machine check |
1527 | * mce=no_cmci Disables CMCI | ||
1528 | * mce=dont_log_ce Clears corrected events silently, no log created for CEs. | ||
1529 | * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared. | ||
1521 | * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) | 1530 | * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) |
1522 | * monarchtimeout is how long to wait for other CPUs on machine | 1531 | * monarchtimeout is how long to wait for other CPUs on machine |
1523 | * check, or 0 to not wait | 1532 | * check, or 0 to not wait |
@@ -1532,6 +1541,12 @@ static int __init mcheck_enable(char *str) | |||
1532 | str++; | 1541 | str++; |
1533 | if (!strcmp(str, "off")) | 1542 | if (!strcmp(str, "off")) |
1534 | mce_disabled = 1; | 1543 | mce_disabled = 1; |
1544 | else if (!strcmp(str, "no_cmci")) | ||
1545 | mce_cmci_disabled = 1; | ||
1546 | else if (!strcmp(str, "dont_log_ce")) | ||
1547 | mce_dont_log_ce = 1; | ||
1548 | else if (!strcmp(str, "ignore_ce")) | ||
1549 | mce_ignore_ce = 1; | ||
1535 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) | 1550 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) |
1536 | mce_bootlog = (str[0] == 'b'); | 1551 | mce_bootlog = (str[0] == 'b'); |
1537 | else if (isdigit(str[0])) { | 1552 | else if (isdigit(str[0])) { |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index b7c5a2470b40..046087e9808f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c | |||
@@ -57,6 +57,9 @@ static int cmci_supported(int *banks) | |||
57 | { | 57 | { |
58 | u64 cap; | 58 | u64 cap; |
59 | 59 | ||
60 | if (mce_cmci_disabled || mce_ignore_ce) | ||
61 | return 0; | ||
62 | |||
60 | /* | 63 | /* |
61 | * Vendor check is not strictly needed, but the initial | 64 | * Vendor check is not strictly needed, but the initial |
62 | * initialization is vendor keyed and this | 65 | * initialization is vendor keyed and this |