diff options
author | Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> | 2012-09-27 13:08:00 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2012-09-27 13:08:00 -0400 |
commit | 450cc201038f31bd496e1b3a44a49790b8827a06 (patch) | |
tree | 254dd5a157702dad656ac37815fb346df94f8d8d | |
parent | 961ebea4ae68075bb5a0acc19f5852bed82bb877 (diff) |
x86/mce: Provide boot argument to honour bios-set CMCI threshold
The ACPI spec doesn't provide for a way for the bios to pass down
recommended thresholds to the OS on a _per-bank_ basis. This patch adds
a new boot option, which if passed, tells Linux to use CMCI thresholds
set by the bios.
As fail-safe, we initialize threshold to 1 if some banks have not been
initialized by the bios and warn the user.
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r-- | Documentation/x86/x86_64/boot-options.txt | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/mce.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 35 |
4 files changed, 50 insertions, 3 deletions
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index c54b4f503e2a..de38429beb71 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt | |||
@@ -50,6 +50,13 @@ Machine check | |||
50 | monarchtimeout: | 50 | monarchtimeout: |
51 | Sets the time in us to wait for other CPUs on machine checks. 0 | 51 | Sets the time in us to wait for other CPUs on machine checks. 0 |
52 | to disable. | 52 | to disable. |
53 | mce=bios_cmci_threshold | ||
54 | Don't overwrite the bios-set CMCI threshold. This boot option | ||
55 | prevents Linux from overwriting the CMCI threshold set by the | ||
56 | bios. Without this option, Linux always sets the CMCI | ||
57 | threshold to 1. Enabling this may make memory predictive failure | ||
58 | analysis less effective if the bios sets thresholds for memory | ||
59 | errors since we will not see details for all errors. | ||
53 | 60 | ||
54 | nomce (for compatibility with i386): same as mce=off | 61 | nomce (for compatibility with i386): same as mce=off |
55 | 62 | ||
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ccaf7c581c8f..54d73b1f00a0 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -161,6 +161,7 @@ DECLARE_PER_CPU(struct device *, mce_device); | |||
161 | #ifdef CONFIG_X86_MCE_INTEL | 161 | #ifdef CONFIG_X86_MCE_INTEL |
162 | extern int mce_cmci_disabled; | 162 | extern int mce_cmci_disabled; |
163 | extern int mce_ignore_ce; | 163 | extern int mce_ignore_ce; |
164 | extern int mce_bios_cmci_threshold; | ||
164 | void mce_intel_feature_init(struct cpuinfo_x86 *c); | 165 | void mce_intel_feature_init(struct cpuinfo_x86 *c); |
165 | void cmci_clear(void); | 166 | void cmci_clear(void); |
166 | void cmci_reenable(void); | 167 | void cmci_reenable(void); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index c311122ea838..29e87d3b2843 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly; | |||
83 | int mce_cmci_disabled __read_mostly; | 83 | int mce_cmci_disabled __read_mostly; |
84 | int mce_ignore_ce __read_mostly; | 84 | int mce_ignore_ce __read_mostly; |
85 | int mce_ser __read_mostly; | 85 | int mce_ser __read_mostly; |
86 | int mce_bios_cmci_threshold __read_mostly; | ||
86 | 87 | ||
87 | struct mce_bank *mce_banks __read_mostly; | 88 | struct mce_bank *mce_banks __read_mostly; |
88 | 89 | ||
@@ -1946,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = { | |||
1946 | * check, or 0 to not wait | 1947 | * check, or 0 to not wait |
1947 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. | 1948 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. |
1948 | * mce=nobootlog Don't log MCEs from before booting. | 1949 | * mce=nobootlog Don't log MCEs from before booting. |
1950 | * mce=bios_cmci_threshold Don't program the CMCI threshold | ||
1949 | */ | 1951 | */ |
1950 | static int __init mcheck_enable(char *str) | 1952 | static int __init mcheck_enable(char *str) |
1951 | { | 1953 | { |
@@ -1965,6 +1967,8 @@ static int __init mcheck_enable(char *str) | |||
1965 | mce_ignore_ce = 1; | 1967 | mce_ignore_ce = 1; |
1966 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) | 1968 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) |
1967 | mce_bootlog = (str[0] == 'b'); | 1969 | mce_bootlog = (str[0] == 'b'); |
1970 | else if (!strcmp(str, "bios_cmci_threshold")) | ||
1971 | mce_bios_cmci_threshold = 1; | ||
1968 | else if (isdigit(str[0])) { | 1972 | else if (isdigit(str[0])) { |
1969 | get_option(&str, &tolerant); | 1973 | get_option(&str, &tolerant); |
1970 | if (*str == ',') { | 1974 | if (*str == ',') { |
@@ -2205,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = { | |||
2205 | &mce_cmci_disabled | 2209 | &mce_cmci_disabled |
2206 | }; | 2210 | }; |
2207 | 2211 | ||
2212 | static struct dev_ext_attribute dev_attr_bios_cmci_threshold = { | ||
2213 | __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL), | ||
2214 | &mce_bios_cmci_threshold | ||
2215 | }; | ||
2216 | |||
2208 | static struct device_attribute *mce_device_attrs[] = { | 2217 | static struct device_attribute *mce_device_attrs[] = { |
2209 | &dev_attr_tolerant.attr, | 2218 | &dev_attr_tolerant.attr, |
2210 | &dev_attr_check_interval.attr, | 2219 | &dev_attr_check_interval.attr, |
@@ -2213,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = { | |||
2213 | &dev_attr_dont_log_ce.attr, | 2222 | &dev_attr_dont_log_ce.attr, |
2214 | &dev_attr_ignore_ce.attr, | 2223 | &dev_attr_ignore_ce.attr, |
2215 | &dev_attr_cmci_disabled.attr, | 2224 | &dev_attr_cmci_disabled.attr, |
2225 | &dev_attr_bios_cmci_threshold.attr, | ||
2216 | NULL | 2226 | NULL |
2217 | }; | 2227 | }; |
2218 | 2228 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 098386fed48e..5f88abf07e9c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -181,10 +181,12 @@ static void cmci_discover(int banks) | |||
181 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | 181 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); |
182 | unsigned long flags; | 182 | unsigned long flags; |
183 | int i; | 183 | int i; |
184 | int bios_wrong_thresh = 0; | ||
184 | 185 | ||
185 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); | 186 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); |
186 | for (i = 0; i < banks; i++) { | 187 | for (i = 0; i < banks; i++) { |
187 | u64 val; | 188 | u64 val; |
189 | int bios_zero_thresh = 0; | ||
188 | 190 | ||
189 | if (test_bit(i, owned)) | 191 | if (test_bit(i, owned)) |
190 | continue; | 192 | continue; |
@@ -198,8 +200,20 @@ static void cmci_discover(int banks) | |||
198 | continue; | 200 | continue; |
199 | } | 201 | } |
200 | 202 | ||
201 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; | 203 | if (!mce_bios_cmci_threshold) { |
202 | val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; | 204 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
205 | val |= CMCI_THRESHOLD; | ||
206 | } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { | ||
207 | /* | ||
208 | * If bios_cmci_threshold boot option was specified | ||
209 | * but the threshold is zero, we'll try to initialize | ||
210 | * it to 1. | ||
211 | */ | ||
212 | bios_zero_thresh = 1; | ||
213 | val |= CMCI_THRESHOLD; | ||
214 | } | ||
215 | |||
216 | val |= MCI_CTL2_CMCI_EN; | ||
203 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 217 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
204 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 218 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
205 | 219 | ||
@@ -207,11 +221,26 @@ static void cmci_discover(int banks) | |||
207 | if (val & MCI_CTL2_CMCI_EN) { | 221 | if (val & MCI_CTL2_CMCI_EN) { |
208 | set_bit(i, owned); | 222 | set_bit(i, owned); |
209 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 223 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
224 | /* | ||
225 | * We are able to set thresholds for some banks that | ||
226 | * had a threshold of 0. This means the BIOS has not | ||
227 | * set the thresholds properly or does not work with | ||
228 | * this boot option. Note down now and report later. | ||
229 | */ | ||
230 | if (mce_bios_cmci_threshold && bios_zero_thresh && | ||
231 | (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) | ||
232 | bios_wrong_thresh = 1; | ||
210 | } else { | 233 | } else { |
211 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | 234 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); |
212 | } | 235 | } |
213 | } | 236 | } |
214 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | 237 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
238 | if (mce_bios_cmci_threshold && bios_wrong_thresh) { | ||
239 | pr_info_once( | ||
240 | "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); | ||
241 | pr_info_once( | ||
242 | "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); | ||
243 | } | ||
215 | } | 244 | } |
216 | 245 | ||
217 | /* | 246 | /* |
@@ -249,7 +278,7 @@ void cmci_clear(void) | |||
249 | continue; | 278 | continue; |
250 | /* Disable CMCI */ | 279 | /* Disable CMCI */ |
251 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 280 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
252 | val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); | 281 | val &= ~MCI_CTL2_CMCI_EN; |
253 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 282 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
254 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 283 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
255 | } | 284 | } |