aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNaveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>2012-09-27 13:08:00 -0400
committerTony Luck <tony.luck@intel.com>2012-09-27 13:08:00 -0400
commit450cc201038f31bd496e1b3a44a49790b8827a06 (patch)
tree254dd5a157702dad656ac37815fb346df94f8d8d
parent961ebea4ae68075bb5a0acc19f5852bed82bb877 (diff)
x86/mce: Provide boot argument to honour bios-set CMCI threshold
The ACPI spec doesn't provide for a way for the bios to pass down recommended thresholds to the OS on a _per-bank_ basis. This patch adds a new boot option, which if passed, tells Linux to use CMCI thresholds set by the bios. As fail-safe, we initialize threshold to 1 if some banks have not been initialized by the bios and warn the user. Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--Documentation/x86/x86_64/boot-options.txt7
-rw-r--r--arch/x86/include/asm/mce.h1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c35
4 files changed, 50 insertions, 3 deletions
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index c54b4f503e2a..de38429beb71 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -50,6 +50,13 @@ Machine check
50 monarchtimeout: 50 monarchtimeout:
51 Sets the time in us to wait for other CPUs on machine checks. 0 51 Sets the time in us to wait for other CPUs on machine checks. 0
52 to disable. 52 to disable.
53 mce=bios_cmci_threshold
54 Don't overwrite the bios-set CMCI threshold. This boot option
55 prevents Linux from overwriting the CMCI threshold set by the
56 bios. Without this option, Linux always sets the CMCI
57 threshold to 1. Enabling this may make memory predictive failure
58 analysis less effective if the bios sets thresholds for memory
59 errors since we will not see details for all errors.
53 60
54 nomce (for compatibility with i386): same as mce=off 61 nomce (for compatibility with i386): same as mce=off
55 62
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ccaf7c581c8f..54d73b1f00a0 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -161,6 +161,7 @@ DECLARE_PER_CPU(struct device *, mce_device);
161#ifdef CONFIG_X86_MCE_INTEL 161#ifdef CONFIG_X86_MCE_INTEL
162extern int mce_cmci_disabled; 162extern int mce_cmci_disabled;
163extern int mce_ignore_ce; 163extern int mce_ignore_ce;
164extern int mce_bios_cmci_threshold;
164void mce_intel_feature_init(struct cpuinfo_x86 *c); 165void mce_intel_feature_init(struct cpuinfo_x86 *c);
165void cmci_clear(void); 166void cmci_clear(void);
166void cmci_reenable(void); 167void cmci_reenable(void);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index c311122ea838..29e87d3b2843 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly;
83int mce_cmci_disabled __read_mostly; 83int mce_cmci_disabled __read_mostly;
84int mce_ignore_ce __read_mostly; 84int mce_ignore_ce __read_mostly;
85int mce_ser __read_mostly; 85int mce_ser __read_mostly;
86int mce_bios_cmci_threshold __read_mostly;
86 87
87struct mce_bank *mce_banks __read_mostly; 88struct mce_bank *mce_banks __read_mostly;
88 89
@@ -1946,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = {
1946 * check, or 0 to not wait 1947 * check, or 0 to not wait
1947 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. 1948 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
1948 * mce=nobootlog Don't log MCEs from before booting. 1949 * mce=nobootlog Don't log MCEs from before booting.
1950 * mce=bios_cmci_threshold Don't program the CMCI threshold
1949 */ 1951 */
1950static int __init mcheck_enable(char *str) 1952static int __init mcheck_enable(char *str)
1951{ 1953{
@@ -1965,6 +1967,8 @@ static int __init mcheck_enable(char *str)
1965 mce_ignore_ce = 1; 1967 mce_ignore_ce = 1;
1966 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) 1968 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
1967 mce_bootlog = (str[0] == 'b'); 1969 mce_bootlog = (str[0] == 'b');
1970 else if (!strcmp(str, "bios_cmci_threshold"))
1971 mce_bios_cmci_threshold = 1;
1968 else if (isdigit(str[0])) { 1972 else if (isdigit(str[0])) {
1969 get_option(&str, &tolerant); 1973 get_option(&str, &tolerant);
1970 if (*str == ',') { 1974 if (*str == ',') {
@@ -2205,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
2205 &mce_cmci_disabled 2209 &mce_cmci_disabled
2206}; 2210};
2207 2211
2212static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
2213 __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
2214 &mce_bios_cmci_threshold
2215};
2216
2208static struct device_attribute *mce_device_attrs[] = { 2217static struct device_attribute *mce_device_attrs[] = {
2209 &dev_attr_tolerant.attr, 2218 &dev_attr_tolerant.attr,
2210 &dev_attr_check_interval.attr, 2219 &dev_attr_check_interval.attr,
@@ -2213,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = {
2213 &dev_attr_dont_log_ce.attr, 2222 &dev_attr_dont_log_ce.attr,
2214 &dev_attr_ignore_ce.attr, 2223 &dev_attr_ignore_ce.attr,
2215 &dev_attr_cmci_disabled.attr, 2224 &dev_attr_cmci_disabled.attr,
2225 &dev_attr_bios_cmci_threshold.attr,
2216 NULL 2226 NULL
2217}; 2227};
2218 2228
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 098386fed48e..5f88abf07e9c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -181,10 +181,12 @@ static void cmci_discover(int banks)
181 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); 181 unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
182 unsigned long flags; 182 unsigned long flags;
183 int i; 183 int i;
184 int bios_wrong_thresh = 0;
184 185
185 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 186 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
186 for (i = 0; i < banks; i++) { 187 for (i = 0; i < banks; i++) {
187 u64 val; 188 u64 val;
189 int bios_zero_thresh = 0;
188 190
189 if (test_bit(i, owned)) 191 if (test_bit(i, owned))
190 continue; 192 continue;
@@ -198,8 +200,20 @@ static void cmci_discover(int banks)
198 continue; 200 continue;
199 } 201 }
200 202
201 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; 203 if (!mce_bios_cmci_threshold) {
202 val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; 204 val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
205 val |= CMCI_THRESHOLD;
206 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
207 /*
208 * If bios_cmci_threshold boot option was specified
209 * but the threshold is zero, we'll try to initialize
210 * it to 1.
211 */
212 bios_zero_thresh = 1;
213 val |= CMCI_THRESHOLD;
214 }
215
216 val |= MCI_CTL2_CMCI_EN;
203 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 217 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
204 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 218 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
205 219
@@ -207,11 +221,26 @@ static void cmci_discover(int banks)
207 if (val & MCI_CTL2_CMCI_EN) { 221 if (val & MCI_CTL2_CMCI_EN) {
208 set_bit(i, owned); 222 set_bit(i, owned);
209 __clear_bit(i, __get_cpu_var(mce_poll_banks)); 223 __clear_bit(i, __get_cpu_var(mce_poll_banks));
224 /*
225 * We are able to set thresholds for some banks that
226 * had a threshold of 0. This means the BIOS has not
227 * set the thresholds properly or does not work with
228 * this boot option. Note down now and report later.
229 */
230 if (mce_bios_cmci_threshold && bios_zero_thresh &&
231 (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
232 bios_wrong_thresh = 1;
210 } else { 233 } else {
211 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); 234 WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
212 } 235 }
213 } 236 }
214 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 237 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
238 if (mce_bios_cmci_threshold && bios_wrong_thresh) {
239 pr_info_once(
240 "bios_cmci_threshold: Some banks do not have valid thresholds set\n");
241 pr_info_once(
242 "bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
243 }
215} 244}
216 245
217/* 246/*
@@ -249,7 +278,7 @@ void cmci_clear(void)
249 continue; 278 continue;
250 /* Disable CMCI */ 279 /* Disable CMCI */
251 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 280 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
252 val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); 281 val &= ~MCI_CTL2_CMCI_EN;
253 wrmsrl(MSR_IA32_MCx_CTL2(i), val); 282 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
254 __clear_bit(i, __get_cpu_var(mce_banks_owned)); 283 __clear_bit(i, __get_cpu_var(mce_banks_owned));
255 } 284 }