aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>2009-06-11 03:06:07 -0400
committerIngo Molnar <mingo@elte.hu>2009-06-11 05:42:18 -0400
commit62fdac5913f71f8f200bd2c9bd59a02e9a1498e9 (patch)
tree9ba4cce91bf53198a295ca7909b027ca17b637fa
parent77e26cca20013e9352a8df86a54640543304a23a (diff)
x86, mce: Add boot options for corrected errors
This patch introduces three boot options (no_cmci, dont_log_ce and ignore_ce) to control handling for corrected errors. The "mce=no_cmci" boot option disables the CMCI feature. Since CMCI is a new feature so having boot controls to disable it will be a help if the hardware is misbehaving. The "mce=dont_log_ce" boot option disables logging for corrected errors. All reported corrected errors will be cleared silently. This option will be useful if you never care about corrected errors. The "mce=ignore_ce" boot option disables features for corrected errors, i.e. polling timer and cmci. All corrected events are not cleared and kept in bank MSRs. Usually this disablement is not recommended, however it will be a help if there are some conflict with the BIOS or hardware monitoring applications etc., that clears corrected events in banks instead of OS. [ And trivial cleanup (space -> tab) for doc is included. ] Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Reviewed-by: Andi Kleen <ak@linux.intel.com> LKML-Reference: <4A30ACDF.5030408@jp.fujitsu.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--Documentation/x86/x86_64/boot-options.txt36
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c19
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c3
4 files changed, 52 insertions, 8 deletions
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 0ee5e3b212f3..fa2bed07d21e 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -7,12 +7,36 @@ Machine check
7 7
8 Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables. 8 Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables.
9 9
10 mce=off disable machine check 10 mce=off
11 mce=bootlog Enable logging of machine checks left over from booting. 11 Disable machine check
12 Disabled by default on AMD because some BIOS leave bogus ones. 12 mce=no_cmci
13 If your BIOS doesn't do that it's a good idea to enable though 13 Disable CMCI(Corrected Machine Check Interrupt) that
14 to make sure you log even machine check events that result 14 Intel processor supports. Usually this disablement is
15 in a reboot. On Intel systems it is enabled by default. 15 not recommended, but it might be handy if your hardware
16 is misbehaving.
17 Note that you'll get more problems without CMCI than with
18 due to the shared banks, i.e. you might get duplicated
19 error logs.
20 mce=dont_log_ce
21 Don't make logs for corrected errors. All events reported
22 as corrected are silently cleared by OS.
23 This option will be useful if you have no interest in any
24 of corrected errors.
25 mce=ignore_ce
26 Disable features for corrected errors, e.g. polling timer
27 and CMCI. All events reported as corrected are not cleared
28 by OS and remained in its error banks.
29 Usually this disablement is not recommended, however if
30 there is an agent checking/clearing corrected errors
31 (e.g. BIOS or hardware monitoring applications), conflicting
32 with OS's error handling, and you cannot deactivate the agent,
33 then this option will be a help.
34 mce=bootlog
35 Enable logging of machine checks left over from booting.
36 Disabled by default on AMD because some BIOS leave bogus ones.
37 If your BIOS doesn't do that it's a good idea to enable though
38 to make sure you log even machine check events that result
39 in a reboot. On Intel systems it is enabled by default.
16 mce=nobootlog 40 mce=nobootlog
17 Disable boot machine check logging. 41 Disable boot machine check logging.
18 mce=tolerancelevel[,monarchtimeout] (number,number) 42 mce=tolerancelevel[,monarchtimeout] (number,number)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 82978ad12072..540a466e50f5 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -119,6 +119,8 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
119#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) 119#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)
120 120
121#ifdef CONFIG_X86_MCE_INTEL 121#ifdef CONFIG_X86_MCE_INTEL
122extern int mce_cmci_disabled;
123extern int mce_ignore_ce;
122void mce_intel_feature_init(struct cpuinfo_x86 *c); 124void mce_intel_feature_init(struct cpuinfo_x86 *c);
123void cmci_clear(void); 125void cmci_clear(void);
124void cmci_reenable(void); 126void cmci_reenable(void);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 6a3127ecb5cc..fabba15e4558 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -84,6 +84,9 @@ static int rip_msr;
84static int mce_bootlog = -1; 84static int mce_bootlog = -1;
85static int monarch_timeout = -1; 85static int monarch_timeout = -1;
86static int mce_panic_timeout; 86static int mce_panic_timeout;
87static int mce_dont_log_ce;
88int mce_cmci_disabled;
89int mce_ignore_ce;
87int mce_ser; 90int mce_ser;
88 91
89static char trigger[128]; 92static char trigger[128];
@@ -526,7 +529,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
526 * Don't get the IP here because it's unlikely to 529 * Don't get the IP here because it's unlikely to
527 * have anything to do with the actual error location. 530 * have anything to do with the actual error location.
528 */ 531 */
529 if (!(flags & MCP_DONTLOG)) { 532 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {
530 mce_log(&m); 533 mce_log(&m);
531 add_taint(TAINT_MACHINE_CHECK); 534 add_taint(TAINT_MACHINE_CHECK);
532 } 535 }
@@ -1307,6 +1310,9 @@ static void mce_init_timer(void)
1307 struct timer_list *t = &__get_cpu_var(mce_timer); 1310 struct timer_list *t = &__get_cpu_var(mce_timer);
1308 int *n = &__get_cpu_var(next_interval); 1311 int *n = &__get_cpu_var(next_interval);
1309 1312
1313 if (mce_ignore_ce)
1314 return;
1315
1310 *n = check_interval * HZ; 1316 *n = check_interval * HZ;
1311 if (!*n) 1317 if (!*n)
1312 return; 1318 return;
@@ -1517,7 +1523,10 @@ static struct miscdevice mce_log_device = {
1517}; 1523};
1518 1524
1519/* 1525/*
1520 * mce=off disables machine check 1526 * mce=off Disables machine check
1527 * mce=no_cmci Disables CMCI
1528 * mce=dont_log_ce Clears corrected events silently, no log created for CEs.
1529 * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared.
1521 * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) 1530 * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
1522 * monarchtimeout is how long to wait for other CPUs on machine 1531 * monarchtimeout is how long to wait for other CPUs on machine
1523 * check, or 0 to not wait 1532 * check, or 0 to not wait
@@ -1532,6 +1541,12 @@ static int __init mcheck_enable(char *str)
1532 str++; 1541 str++;
1533 if (!strcmp(str, "off")) 1542 if (!strcmp(str, "off"))
1534 mce_disabled = 1; 1543 mce_disabled = 1;
1544 else if (!strcmp(str, "no_cmci"))
1545 mce_cmci_disabled = 1;
1546 else if (!strcmp(str, "dont_log_ce"))
1547 mce_dont_log_ce = 1;
1548 else if (!strcmp(str, "ignore_ce"))
1549 mce_ignore_ce = 1;
1535 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) 1550 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
1536 mce_bootlog = (str[0] == 'b'); 1551 mce_bootlog = (str[0] == 'b');
1537 else if (isdigit(str[0])) { 1552 else if (isdigit(str[0])) {
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index b7c5a2470b40..046087e9808f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -57,6 +57,9 @@ static int cmci_supported(int *banks)
57{ 57{
58 u64 cap; 58 u64 cap;
59 59
60 if (mce_cmci_disabled || mce_ignore_ce)
61 return 0;
62
60 /* 63 /*
61 * Vendor check is not strictly needed, but the initial 64 * Vendor check is not strictly needed, but the initial
62 * initialization is vendor keyed and this 65 * initialization is vendor keyed and this