diff options
| author | Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> | 2012-09-27 13:08:00 -0400 |
|---|---|---|
| committer | Tony Luck <tony.luck@intel.com> | 2012-09-27 13:08:00 -0400 |
| commit | 450cc201038f31bd496e1b3a44a49790b8827a06 (patch) | |
| tree | 254dd5a157702dad656ac37815fb346df94f8d8d | |
| parent | 961ebea4ae68075bb5a0acc19f5852bed82bb877 (diff) | |
x86/mce: Provide boot argument to honour bios-set CMCI threshold
The ACPI spec doesn't provide for a way for the bios to pass down
recommended thresholds to the OS on a _per-bank_ basis. This patch adds
a new boot option, which if passed, tells Linux to use CMCI thresholds
set by the bios.
As fail-safe, we initialize threshold to 1 if some banks have not been
initialized by the bios and warn the user.
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
| -rw-r--r-- | Documentation/x86/x86_64/boot-options.txt | 7 | ||||
| -rw-r--r-- | arch/x86/include/asm/mce.h | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 10 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 35 |
4 files changed, 50 insertions, 3 deletions
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index c54b4f503e2a..de38429beb71 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt | |||
| @@ -50,6 +50,13 @@ Machine check | |||
| 50 | monarchtimeout: | 50 | monarchtimeout: |
| 51 | Sets the time in us to wait for other CPUs on machine checks. 0 | 51 | Sets the time in us to wait for other CPUs on machine checks. 0 |
| 52 | to disable. | 52 | to disable. |
| 53 | mce=bios_cmci_threshold | ||
| 54 | Don't overwrite the bios-set CMCI threshold. This boot option | ||
| 55 | prevents Linux from overwriting the CMCI threshold set by the | ||
| 56 | bios. Without this option, Linux always sets the CMCI | ||
| 57 | threshold to 1. Enabling this may make memory predictive failure | ||
| 58 | analysis less effective if the bios sets thresholds for memory | ||
| 59 | errors since we will not see details for all errors. | ||
| 53 | 60 | ||
| 54 | nomce (for compatibility with i386): same as mce=off | 61 | nomce (for compatibility with i386): same as mce=off |
| 55 | 62 | ||
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ccaf7c581c8f..54d73b1f00a0 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
| @@ -161,6 +161,7 @@ DECLARE_PER_CPU(struct device *, mce_device); | |||
| 161 | #ifdef CONFIG_X86_MCE_INTEL | 161 | #ifdef CONFIG_X86_MCE_INTEL |
| 162 | extern int mce_cmci_disabled; | 162 | extern int mce_cmci_disabled; |
| 163 | extern int mce_ignore_ce; | 163 | extern int mce_ignore_ce; |
| 164 | extern int mce_bios_cmci_threshold; | ||
| 164 | void mce_intel_feature_init(struct cpuinfo_x86 *c); | 165 | void mce_intel_feature_init(struct cpuinfo_x86 *c); |
| 165 | void cmci_clear(void); | 166 | void cmci_clear(void); |
| 166 | void cmci_reenable(void); | 167 | void cmci_reenable(void); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index c311122ea838..29e87d3b2843 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
| @@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly; | |||
| 83 | int mce_cmci_disabled __read_mostly; | 83 | int mce_cmci_disabled __read_mostly; |
| 84 | int mce_ignore_ce __read_mostly; | 84 | int mce_ignore_ce __read_mostly; |
| 85 | int mce_ser __read_mostly; | 85 | int mce_ser __read_mostly; |
| 86 | int mce_bios_cmci_threshold __read_mostly; | ||
| 86 | 87 | ||
| 87 | struct mce_bank *mce_banks __read_mostly; | 88 | struct mce_bank *mce_banks __read_mostly; |
| 88 | 89 | ||
| @@ -1946,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = { | |||
| 1946 | * check, or 0 to not wait | 1947 | * check, or 0 to not wait |
| 1947 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. | 1948 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. |
| 1948 | * mce=nobootlog Don't log MCEs from before booting. | 1949 | * mce=nobootlog Don't log MCEs from before booting. |
| 1950 | * mce=bios_cmci_threshold Don't program the CMCI threshold | ||
| 1949 | */ | 1951 | */ |
| 1950 | static int __init mcheck_enable(char *str) | 1952 | static int __init mcheck_enable(char *str) |
| 1951 | { | 1953 | { |
| @@ -1965,6 +1967,8 @@ static int __init mcheck_enable(char *str) | |||
| 1965 | mce_ignore_ce = 1; | 1967 | mce_ignore_ce = 1; |
| 1966 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) | 1968 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) |
| 1967 | mce_bootlog = (str[0] == 'b'); | 1969 | mce_bootlog = (str[0] == 'b'); |
| 1970 | else if (!strcmp(str, "bios_cmci_threshold")) | ||
| 1971 | mce_bios_cmci_threshold = 1; | ||
| 1968 | else if (isdigit(str[0])) { | 1972 | else if (isdigit(str[0])) { |
| 1969 | get_option(&str, &tolerant); | 1973 | get_option(&str, &tolerant); |
| 1970 | if (*str == ',') { | 1974 | if (*str == ',') { |
| @@ -2205,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = { | |||
| 2205 | &mce_cmci_disabled | 2209 | &mce_cmci_disabled |
| 2206 | }; | 2210 | }; |
| 2207 | 2211 | ||
| 2212 | static struct dev_ext_attribute dev_attr_bios_cmci_threshold = { | ||
| 2213 | __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL), | ||
| 2214 | &mce_bios_cmci_threshold | ||
| 2215 | }; | ||
| 2216 | |||
| 2208 | static struct device_attribute *mce_device_attrs[] = { | 2217 | static struct device_attribute *mce_device_attrs[] = { |
| 2209 | &dev_attr_tolerant.attr, | 2218 | &dev_attr_tolerant.attr, |
| 2210 | &dev_attr_check_interval.attr, | 2219 | &dev_attr_check_interval.attr, |
| @@ -2213,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = { | |||
| 2213 | &dev_attr_dont_log_ce.attr, | 2222 | &dev_attr_dont_log_ce.attr, |
| 2214 | &dev_attr_ignore_ce.attr, | 2223 | &dev_attr_ignore_ce.attr, |
| 2215 | &dev_attr_cmci_disabled.attr, | 2224 | &dev_attr_cmci_disabled.attr, |
| 2225 | &dev_attr_bios_cmci_threshold.attr, | ||
| 2216 | NULL | 2226 | NULL |
| 2217 | }; | 2227 | }; |
| 2218 | 2228 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 098386fed48e..5f88abf07e9c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
| @@ -181,10 +181,12 @@ static void cmci_discover(int banks) | |||
| 181 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | 181 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); |
| 182 | unsigned long flags; | 182 | unsigned long flags; |
| 183 | int i; | 183 | int i; |
| 184 | int bios_wrong_thresh = 0; | ||
| 184 | 185 | ||
| 185 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); | 186 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); |
| 186 | for (i = 0; i < banks; i++) { | 187 | for (i = 0; i < banks; i++) { |
| 187 | u64 val; | 188 | u64 val; |
| 189 | int bios_zero_thresh = 0; | ||
| 188 | 190 | ||
| 189 | if (test_bit(i, owned)) | 191 | if (test_bit(i, owned)) |
| 190 | continue; | 192 | continue; |
| @@ -198,8 +200,20 @@ static void cmci_discover(int banks) | |||
| 198 | continue; | 200 | continue; |
| 199 | } | 201 | } |
| 200 | 202 | ||
| 201 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; | 203 | if (!mce_bios_cmci_threshold) { |
| 202 | val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; | 204 | val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; |
| 205 | val |= CMCI_THRESHOLD; | ||
| 206 | } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { | ||
| 207 | /* | ||
| 208 | * If bios_cmci_threshold boot option was specified | ||
| 209 | * but the threshold is zero, we'll try to initialize | ||
| 210 | * it to 1. | ||
| 211 | */ | ||
| 212 | bios_zero_thresh = 1; | ||
| 213 | val |= CMCI_THRESHOLD; | ||
| 214 | } | ||
| 215 | |||
| 216 | val |= MCI_CTL2_CMCI_EN; | ||
| 203 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 217 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 204 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 218 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 205 | 219 | ||
| @@ -207,11 +221,26 @@ static void cmci_discover(int banks) | |||
| 207 | if (val & MCI_CTL2_CMCI_EN) { | 221 | if (val & MCI_CTL2_CMCI_EN) { |
| 208 | set_bit(i, owned); | 222 | set_bit(i, owned); |
| 209 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 223 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
| 224 | /* | ||
| 225 | * We are able to set thresholds for some banks that | ||
| 226 | * had a threshold of 0. This means the BIOS has not | ||
| 227 | * set the thresholds properly or does not work with | ||
| 228 | * this boot option. Note down now and report later. | ||
| 229 | */ | ||
| 230 | if (mce_bios_cmci_threshold && bios_zero_thresh && | ||
| 231 | (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) | ||
| 232 | bios_wrong_thresh = 1; | ||
| 210 | } else { | 233 | } else { |
| 211 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | 234 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); |
| 212 | } | 235 | } |
| 213 | } | 236 | } |
| 214 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | 237 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
| 238 | if (mce_bios_cmci_threshold && bios_wrong_thresh) { | ||
| 239 | pr_info_once( | ||
| 240 | "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); | ||
| 241 | pr_info_once( | ||
| 242 | "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); | ||
| 243 | } | ||
| 215 | } | 244 | } |
| 216 | 245 | ||
| 217 | /* | 246 | /* |
| @@ -249,7 +278,7 @@ void cmci_clear(void) | |||
| 249 | continue; | 278 | continue; |
| 250 | /* Disable CMCI */ | 279 | /* Disable CMCI */ |
| 251 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); | 280 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 252 | val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); | 281 | val &= ~MCI_CTL2_CMCI_EN; |
| 253 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 282 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 254 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 283 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
| 255 | } | 284 | } |
