aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/x86/x86_64/boot-options.txt5
-rw-r--r--arch/x86/include/asm/acpi.h2
-rw-r--r--arch/x86/include/asm/mce.h3
-rw-r--r--arch/x86/kernel/acpi/boot.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c28
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c42
-rw-r--r--drivers/acpi/apei/ghes.c38
-rw-r--r--drivers/acpi/apei/hest.c38
-rw-r--r--include/linux/mm.h1
-rw-r--r--mm/memory-failure.c5
11 files changed, 150 insertions, 20 deletions
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index e9e8ddbbf376..1228b22e142b 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -176,6 +176,11 @@ ACPI
176 176
177 acpi=noirq Don't route interrupts 177 acpi=noirq Don't route interrupts
178 178
179 acpi=nocmcff Disable firmware first mode for corrected errors. This
180 disables parsing the HEST CMC error source to check if
181 firmware has set the FF flag. This may result in
182 duplicate corrected error reports.
183
179PCI 184PCI
180 185
181 pci=off Don't use PCI 186 pci=off Don't use PCI
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 2dfac58f3b11..b1977bad5435 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -86,6 +86,7 @@ extern int acpi_pci_disabled;
86extern int acpi_skip_timer_override; 86extern int acpi_skip_timer_override;
87extern int acpi_use_timer_override; 87extern int acpi_use_timer_override;
88extern int acpi_fix_pin2_polarity; 88extern int acpi_fix_pin2_polarity;
89extern int acpi_disable_cmcff;
89 90
90extern u8 acpi_sci_flags; 91extern u8 acpi_sci_flags;
91extern int acpi_sci_override_gsi; 92extern int acpi_sci_override_gsi;
@@ -168,6 +169,7 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf)
168 169
169#define acpi_lapic 0 170#define acpi_lapic 0
170#define acpi_ioapic 0 171#define acpi_ioapic 0
172#define acpi_disable_cmcff 0
171static inline void acpi_noirq_set(void) { } 173static inline void acpi_noirq_set(void) { }
172static inline void acpi_disable_pci(void) { } 174static inline void acpi_disable_pci(void) { }
173static inline void disable_acpi(void) { } 175static inline void disable_acpi(void) { }
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 29e3093bbd21..163d7a4451f0 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -188,6 +188,9 @@ extern void register_mce_write_callback(ssize_t (*)(struct file *filp,
188 const char __user *ubuf, 188 const char __user *ubuf,
189 size_t usize, loff_t *off)); 189 size_t usize, loff_t *off));
190 190
191/* Disable CMCI/polling for MCA bank claimed by firmware */
192extern void mce_disable_bank(int bank);
193
191/* 194/*
192 * Exception handler 195 * Exception handler
193 */ 196 */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 2627a81253ee..fead83292e6e 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -67,6 +67,7 @@ EXPORT_SYMBOL(acpi_pci_disabled);
67int acpi_lapic; 67int acpi_lapic;
68int acpi_ioapic; 68int acpi_ioapic;
69int acpi_strict; 69int acpi_strict;
70int acpi_disable_cmcff;
70 71
71u8 acpi_sci_flags __initdata; 72u8 acpi_sci_flags __initdata;
72int acpi_sci_override_gsi __initdata; 73int acpi_sci_override_gsi __initdata;
@@ -1626,6 +1627,10 @@ static int __init parse_acpi(char *arg)
1626 /* "acpi=copy_dsdt" copys DSDT */ 1627 /* "acpi=copy_dsdt" copys DSDT */
1627 else if (strcmp(arg, "copy_dsdt") == 0) { 1628 else if (strcmp(arg, "copy_dsdt") == 0) {
1628 acpi_gbl_copy_dsdt_locally = 1; 1629 acpi_gbl_copy_dsdt_locally = 1;
1630 }
1631 /* "acpi=nocmcff" disables FF mode for corrected errors */
1632 else if (strcmp(arg, "nocmcff") == 0) {
1633 acpi_disable_cmcff = 1;
1629 } else { 1634 } else {
1630 /* Core will printk when we return error. */ 1635 /* Core will printk when we return error. */
1631 return -EINVAL; 1636 return -EINVAL;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 5b7d4fa5d3b7..09edd0b65fef 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -25,15 +25,18 @@ int mce_severity(struct mce *a, int tolerant, char **msg);
25struct dentry *mce_get_debugfs_dir(void); 25struct dentry *mce_get_debugfs_dir(void);
26 26
27extern struct mce_bank *mce_banks; 27extern struct mce_bank *mce_banks;
28extern mce_banks_t mce_banks_ce_disabled;
28 29
29#ifdef CONFIG_X86_MCE_INTEL 30#ifdef CONFIG_X86_MCE_INTEL
30unsigned long mce_intel_adjust_timer(unsigned long interval); 31unsigned long mce_intel_adjust_timer(unsigned long interval);
31void mce_intel_cmci_poll(void); 32void mce_intel_cmci_poll(void);
32void mce_intel_hcpu_update(unsigned long cpu); 33void mce_intel_hcpu_update(unsigned long cpu);
34void cmci_disable_bank(int bank);
33#else 35#else
34# define mce_intel_adjust_timer mce_adjust_timer_default 36# define mce_intel_adjust_timer mce_adjust_timer_default
35static inline void mce_intel_cmci_poll(void) { } 37static inline void mce_intel_cmci_poll(void) { }
36static inline void mce_intel_hcpu_update(unsigned long cpu) { } 38static inline void mce_intel_hcpu_update(unsigned long cpu) { }
39static inline void cmci_disable_bank(int bank) { }
37#endif 40#endif
38 41
39void mce_timer_kick(unsigned long interval); 42void mce_timer_kick(unsigned long interval);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 87a65c939bcd..b3218cdee95f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -97,6 +97,15 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
97 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 97 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
98}; 98};
99 99
100/*
101 * MCA banks controlled through firmware first for corrected errors.
102 * This is a global list of banks for which we won't enable CMCI and we
103 * won't poll. Firmware controls these banks and is responsible for
104 * reporting corrected errors through GHES. Uncorrected/recoverable
105 * errors are still notified through a machine check.
106 */
107mce_banks_t mce_banks_ce_disabled;
108
100static DEFINE_PER_CPU(struct work_struct, mce_work); 109static DEFINE_PER_CPU(struct work_struct, mce_work);
101 110
102static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); 111static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
@@ -1935,6 +1944,25 @@ static struct miscdevice mce_chrdev_device = {
1935 &mce_chrdev_ops, 1944 &mce_chrdev_ops,
1936}; 1945};
1937 1946
1947static void __mce_disable_bank(void *arg)
1948{
1949 int bank = *((int *)arg);
1950 __clear_bit(bank, __get_cpu_var(mce_poll_banks));
1951 cmci_disable_bank(bank);
1952}
1953
1954void mce_disable_bank(int bank)
1955{
1956 if (bank >= mca_cfg.banks) {
1957 pr_warn(FW_BUG
1958 "Ignoring request to disable invalid MCA bank %d.\n",
1959 bank);
1960 return;
1961 }
1962 set_bit(bank, mce_banks_ce_disabled);
1963 on_each_cpu(__mce_disable_bank, &bank, 1);
1964}
1965
1938/* 1966/*
1939 * mce=off Disables machine check 1967 * mce=off Disables machine check
1940 * mce=no_cmci Disables CMCI 1968 * mce=no_cmci Disables CMCI
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index d56405309dc1..4cfe0458ca66 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -203,6 +203,10 @@ static void cmci_discover(int banks)
203 if (test_bit(i, owned)) 203 if (test_bit(i, owned))
204 continue; 204 continue;
205 205
206 /* Skip banks in firmware first mode */
207 if (test_bit(i, mce_banks_ce_disabled))
208 continue;
209
206 rdmsrl(MSR_IA32_MCx_CTL2(i), val); 210 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
207 211
208 /* Already owned by someone else? */ 212 /* Already owned by someone else? */
@@ -271,6 +275,19 @@ void cmci_recheck(void)
271 local_irq_restore(flags); 275 local_irq_restore(flags);
272} 276}
273 277
278/* Caller must hold the lock on cmci_discover_lock */
279static void __cmci_disable_bank(int bank)
280{
281 u64 val;
282
283 if (!test_bit(bank, __get_cpu_var(mce_banks_owned)))
284 return;
285 rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
286 val &= ~MCI_CTL2_CMCI_EN;
287 wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
288 __clear_bit(bank, __get_cpu_var(mce_banks_owned));
289}
290
274/* 291/*
275 * Disable CMCI on this CPU for all banks it owns when it goes down. 292 * Disable CMCI on this CPU for all banks it owns when it goes down.
276 * This allows other CPUs to claim the banks on rediscovery. 293 * This allows other CPUs to claim the banks on rediscovery.
@@ -280,20 +297,12 @@ void cmci_clear(void)
280 unsigned long flags; 297 unsigned long flags;
281 int i; 298 int i;
282 int banks; 299 int banks;
283 u64 val;
284 300
285 if (!cmci_supported(&banks)) 301 if (!cmci_supported(&banks))
286 return; 302 return;
287 raw_spin_lock_irqsave(&cmci_discover_lock, flags); 303 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
288 for (i = 0; i < banks; i++) { 304 for (i = 0; i < banks; i++)
289 if (!test_bit(i, __get_cpu_var(mce_banks_owned))) 305 __cmci_disable_bank(i);
290 continue;
291 /* Disable CMCI */
292 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
293 val &= ~MCI_CTL2_CMCI_EN;
294 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
295 __clear_bit(i, __get_cpu_var(mce_banks_owned));
296 }
297 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 306 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
298} 307}
299 308
@@ -327,6 +336,19 @@ void cmci_reenable(void)
327 cmci_discover(banks); 336 cmci_discover(banks);
328} 337}
329 338
339void cmci_disable_bank(int bank)
340{
341 int banks;
342 unsigned long flags;
343
344 if (!cmci_supported(&banks))
345 return;
346
347 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
348 __cmci_disable_bank(bank);
349 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
350}
351
330static void intel_init_cmci(void) 352static void intel_init_cmci(void)
331{ 353{
332 int banks; 354 int banks;
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index ec9b57d428a1..8ec37bbdd699 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -409,6 +409,34 @@ static void ghes_clear_estatus(struct ghes *ghes)
409 ghes->flags &= ~GHES_TO_CLEAR; 409 ghes->flags &= ~GHES_TO_CLEAR;
410} 410}
411 411
412static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
413{
414#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
415 unsigned long pfn;
416 int sec_sev = ghes_severity(gdata->error_severity);
417 struct cper_sec_mem_err *mem_err;
418 mem_err = (struct cper_sec_mem_err *)(gdata + 1);
419
420 if (sec_sev == GHES_SEV_CORRECTED &&
421 (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) &&
422 (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)) {
423 pfn = mem_err->physical_addr >> PAGE_SHIFT;
424 if (pfn_valid(pfn))
425 memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE);
426 else if (printk_ratelimit())
427 pr_warn(FW_WARN GHES_PFX
428 "Invalid address in generic error data: %#llx\n",
429 mem_err->physical_addr);
430 }
431 if (sev == GHES_SEV_RECOVERABLE &&
432 sec_sev == GHES_SEV_RECOVERABLE &&
433 mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
434 pfn = mem_err->physical_addr >> PAGE_SHIFT;
435 memory_failure_queue(pfn, 0, 0);
436 }
437#endif
438}
439
412static void ghes_do_proc(struct ghes *ghes, 440static void ghes_do_proc(struct ghes *ghes,
413 const struct acpi_hest_generic_status *estatus) 441 const struct acpi_hest_generic_status *estatus)
414{ 442{
@@ -428,15 +456,7 @@ static void ghes_do_proc(struct ghes *ghes,
428 apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, 456 apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
429 mem_err); 457 mem_err);
430#endif 458#endif
431#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE 459 ghes_handle_memory_failure(gdata, sev);
432 if (sev == GHES_SEV_RECOVERABLE &&
433 sec_sev == GHES_SEV_RECOVERABLE &&
434 mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
435 unsigned long pfn;
436 pfn = mem_err->physical_addr >> PAGE_SHIFT;
437 memory_failure_queue(pfn, 0, 0);
438 }
439#endif
440 } 460 }
441#ifdef CONFIG_ACPI_APEI_PCIEAER 461#ifdef CONFIG_ACPI_APEI_PCIEAER
442 else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, 462 else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index f5ef5d54e4ac..502024502b13 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -36,6 +36,7 @@
36#include <linux/io.h> 36#include <linux/io.h>
37#include <linux/platform_device.h> 37#include <linux/platform_device.h>
38#include <acpi/apei.h> 38#include <acpi/apei.h>
39#include <asm/mce.h>
39 40
40#include "apei-internal.h" 41#include "apei-internal.h"
41 42
@@ -121,6 +122,40 @@ int apei_hest_parse(apei_hest_func_t func, void *data)
121} 122}
122EXPORT_SYMBOL_GPL(apei_hest_parse); 123EXPORT_SYMBOL_GPL(apei_hest_parse);
123 124
125/*
126 * Check if firmware advertises firmware first mode. We need FF bit to be set
127 * along with a set of MC banks which work in FF mode.
128 */
129static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data)
130{
131 int i;
132 struct acpi_hest_ia_corrected *cmc;
133 struct acpi_hest_ia_error_bank *mc_bank;
134
135 if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
136 return 0;
137
138 cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
139 if (!cmc->enabled)
140 return 0;
141
142 /*
143 * We expect HEST to provide a list of MC banks that report errors
144 * in firmware first mode. Otherwise, return non-zero value to
145 * indicate that we are done parsing HEST.
146 */
147 if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || !cmc->num_hardware_banks)
148 return 1;
149
150 pr_info(HEST_PFX "Enabling Firmware First mode for corrected errors.\n");
151
152 mc_bank = (struct acpi_hest_ia_error_bank *)(cmc + 1);
153 for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++)
154 mce_disable_bank(mc_bank->bank_number);
155
156 return 1;
157}
158
124struct ghes_arr { 159struct ghes_arr {
125 struct platform_device **ghes_devs; 160 struct platform_device **ghes_devs;
126 unsigned int count; 161 unsigned int count;
@@ -227,6 +262,9 @@ void __init acpi_hest_init(void)
227 goto err; 262 goto err;
228 } 263 }
229 264
265 if (!acpi_disable_cmcff)
266 apei_hest_parse(hest_parse_cmc, NULL);
267
230 if (!ghes_disable) { 268 if (!ghes_disable) {
231 rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count); 269 rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
232 if (rc) 270 if (rc)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f0224608d15e..d2d59b4149d0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1798,6 +1798,7 @@ enum mf_flags {
1798 MF_COUNT_INCREASED = 1 << 0, 1798 MF_COUNT_INCREASED = 1 << 0,
1799 MF_ACTION_REQUIRED = 1 << 1, 1799 MF_ACTION_REQUIRED = 1 << 1,
1800 MF_MUST_KILL = 1 << 2, 1800 MF_MUST_KILL = 1 << 2,
1801 MF_SOFT_OFFLINE = 1 << 3,
1801}; 1802};
1802extern int memory_failure(unsigned long pfn, int trapno, int flags); 1803extern int memory_failure(unsigned long pfn, int trapno, int flags);
1803extern void memory_failure_queue(unsigned long pfn, int trapno, int flags); 1804extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2c13aa7a0164..55d7c8026ab0 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1286,7 +1286,10 @@ static void memory_failure_work_func(struct work_struct *work)
1286 spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); 1286 spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
1287 if (!gotten) 1287 if (!gotten)
1288 break; 1288 break;
1289 memory_failure(entry.pfn, entry.trapno, entry.flags); 1289 if (entry.flags & MF_SOFT_OFFLINE)
1290 soft_offline_page(pfn_to_page(entry.pfn), entry.flags);
1291 else
1292 memory_failure(entry.pfn, entry.trapno, entry.flags);
1290 } 1293 }
1291} 1294}
1292 1295