aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/acpi/apei/einj.txt19
-rw-r--r--Documentation/kernel-parameters.txt8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c14
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c12
-rw-r--r--drivers/acpi/acpi_extlog.c18
-rw-r--r--drivers/acpi/apei/apei-base.c4
-rw-r--r--drivers/acpi/apei/einj.c58
-rw-r--r--drivers/acpi/apei/erst.c2
-rw-r--r--drivers/acpi/apei/ghes.c39
-rw-r--r--drivers/edac/edac_stub.c19
-rw-r--r--drivers/edac/sb_edac.c6
-rw-r--r--include/linux/edac.h28
-rw-r--r--include/trace/events/ras.h10
13 files changed, 183 insertions, 54 deletions
diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt
index a58b63da1a36..f51861bcb07b 100644
--- a/Documentation/acpi/apei/einj.txt
+++ b/Documentation/acpi/apei/einj.txt
@@ -45,11 +45,22 @@ directory apei/einj. The following files are provided.
45 injection. Before this, please specify all necessary error 45 injection. Before this, please specify all necessary error
46 parameters. 46 parameters.
47 47
48- flags
49 Present for kernel version 3.13 and above. Used to specify which
50 of param{1..4} are valid and should be used by BIOS during injection.
51 Value is a bitmask as specified in ACPI5.0 spec for the
52 SET_ERROR_TYPE_WITH_ADDRESS data structure:
53 Bit 0 - Processor APIC field valid (see param3 below)
54 Bit 1 - Memory address and mask valid (param1 and param2)
55 Bit 2 - PCIe (seg,bus,dev,fn) valid (param4 below)
56 If set to zero, legacy behaviour is used where the type of injection
57 specifies just one bit set, and param1 is multiplexed.
58
48- param1 59- param1
49 This file is used to set the first error parameter value. Effect of 60 This file is used to set the first error parameter value. Effect of
50 parameter depends on error_type specified. For example, if error 61 parameter depends on error_type specified. For example, if error
51 type is memory related type, the param1 should be a valid physical 62 type is memory related type, the param1 should be a valid physical
52 memory address. 63 memory address. [Unless "flag" is set - see above]
53 64
54- param2 65- param2
55 This file is used to set the second error parameter value. Effect of 66 This file is used to set the second error parameter value. Effect of
@@ -58,6 +69,12 @@ directory apei/einj. The following files are provided.
58 address mask. Linux requires page or narrower granularity, say, 69 address mask. Linux requires page or narrower granularity, say,
59 0xfffffffffffff000. 70 0xfffffffffffff000.
60 71
72- param3
73 Used when the 0x1 bit is set in "flag" to specify the APIC id
74
75- param4
76 Used when the 0x4 bit is set in "flag" to specify target PCIe device
77
61- notrigger 78- notrigger
62 The EINJ mechanism is a two step process. First inject the error, then 79 The EINJ mechanism is a two step process. First inject the error, then
63 perform some actions to trigger it. Setting "notrigger" to 1 skips the 80 perform some actions to trigger it. Setting "notrigger" to 1 skips the
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4eb5fff022b4..092cfd139065 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -890,6 +890,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
890 890
891 The xen output can only be used by Xen PV guests. 891 The xen output can only be used by Xen PV guests.
892 892
893 edac_report= [HW,EDAC] Control how to report EDAC event
894 Format: {"on" | "off" | "force"}
895 on: enable EDAC to report H/W event. May be overridden
896 by other higher priority error reporting module.
897 off: disable H/W event reporting through EDAC.
898 force: enforce the use of EDAC to report H/W event.
899 default: on.
900
893 ekgdboc= [X86,KGDB] Allow early kernel console debugging 901 ekgdboc= [X86,KGDB] Allow early kernel console debugging
894 ekgdboc=kbd 902 ekgdboc=kbd
895 903
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index de8b60a53f69..a1aef9533154 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -33,22 +33,28 @@
33#include <linux/acpi.h> 33#include <linux/acpi.h>
34#include <linux/cper.h> 34#include <linux/cper.h>
35#include <acpi/apei.h> 35#include <acpi/apei.h>
36#include <acpi/ghes.h>
36#include <asm/mce.h> 37#include <asm/mce.h>
37 38
38#include "mce-internal.h" 39#include "mce-internal.h"
39 40
40void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) 41void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
41{ 42{
42 struct mce m; 43 struct mce m;
43 44
44 /* Only corrected MC is reported */ 45 if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
45 if (!corrected || !(mem_err->validation_bits & CPER_MEM_VALID_PA))
46 return; 46 return;
47 47
48 mce_setup(&m); 48 mce_setup(&m);
49 m.bank = 1; 49 m.bank = 1;
50 /* Fake a memory read corrected error with unknown channel */ 50 /* Fake a memory read error with unknown channel */
51 m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; 51 m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
52
53 if (severity >= GHES_SEV_RECOVERABLE)
54 m.status |= MCI_STATUS_UC;
55 if (severity >= GHES_SEV_PANIC)
56 m.status |= MCI_STATUS_PCC;
57
52 m.addr = mem_err->physical_addr; 58 m.addr = mem_err->physical_addr;
53 mce_log(&m); 59 mce_log(&m);
54 mce_notify_irq(); 60 mce_notify_irq();
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b3218cdee95f..4d5419b249da 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1638,15 +1638,15 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1638 1638
1639static void mce_start_timer(unsigned int cpu, struct timer_list *t) 1639static void mce_start_timer(unsigned int cpu, struct timer_list *t)
1640{ 1640{
1641 unsigned long iv = mce_adjust_timer(check_interval * HZ); 1641 unsigned long iv = check_interval * HZ;
1642
1643 __this_cpu_write(mce_next_interval, iv);
1644 1642
1645 if (mca_cfg.ignore_ce || !iv) 1643 if (mca_cfg.ignore_ce || !iv)
1646 return; 1644 return;
1647 1645
1646 per_cpu(mce_next_interval, cpu) = iv;
1647
1648 t->expires = round_jiffies(jiffies + iv); 1648 t->expires = round_jiffies(jiffies + iv);
1649 add_timer_on(t, smp_processor_id()); 1649 add_timer_on(t, cpu);
1650} 1650}
1651 1651
1652static void __mcheck_cpu_init_timer(void) 1652static void __mcheck_cpu_init_timer(void)
@@ -2272,8 +2272,10 @@ static int mce_device_create(unsigned int cpu)
2272 dev->release = &mce_device_release; 2272 dev->release = &mce_device_release;
2273 2273
2274 err = device_register(dev); 2274 err = device_register(dev);
2275 if (err) 2275 if (err) {
2276 put_device(dev);
2276 return err; 2277 return err;
2278 }
2277 2279
2278 for (i = 0; mce_device_attrs[i]; i++) { 2280 for (i = 0; mce_device_attrs[i]; i++) {
2279 err = device_create_file(dev, mce_device_attrs[i]); 2281 err = device_create_file(dev, mce_device_attrs[i]);
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index a6869e110ce5..5d33c5415405 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -12,6 +12,7 @@
12#include <acpi/acpi_bus.h> 12#include <acpi/acpi_bus.h>
13#include <linux/cper.h> 13#include <linux/cper.h>
14#include <linux/ratelimit.h> 14#include <linux/ratelimit.h>
15#include <linux/edac.h>
15#include <asm/cpu.h> 16#include <asm/cpu.h>
16#include <asm/mce.h> 17#include <asm/mce.h>
17 18
@@ -43,6 +44,8 @@ struct extlog_l1_head {
43 u8 rev1[12]; 44 u8 rev1[12];
44}; 45};
45 46
47static int old_edac_report_status;
48
46static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295"; 49static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295";
47 50
48/* L1 table related physical address */ 51/* L1 table related physical address */
@@ -150,7 +153,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
150 153
151 rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu); 154 rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
152 155
153 return NOTIFY_DONE; 156 return NOTIFY_STOP;
154} 157}
155 158
156static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret) 159static int extlog_get_dsm(acpi_handle handle, int rev, int func, u64 *ret)
@@ -231,8 +234,12 @@ static int __init extlog_init(void)
231 u64 cap; 234 u64 cap;
232 int rc; 235 int rc;
233 236
234 rc = -ENODEV; 237 if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
238 pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
239 return -EPERM;
240 }
235 241
242 rc = -ENODEV;
236 rdmsrl(MSR_IA32_MCG_CAP, cap); 243 rdmsrl(MSR_IA32_MCG_CAP, cap);
237 if (!(cap & MCG_ELOG_P)) 244 if (!(cap & MCG_ELOG_P))
238 return rc; 245 return rc;
@@ -287,6 +294,12 @@ static int __init extlog_init(void)
287 if (elog_buf == NULL) 294 if (elog_buf == NULL)
288 goto err_release_elog; 295 goto err_release_elog;
289 296
297 /*
298 * eMCA event report method has higher priority than EDAC method,
299 * unless EDAC event report method is mandatory.
300 */
301 old_edac_report_status = get_edac_report_status();
302 set_edac_report_status(EDAC_REPORTING_DISABLED);
290 mce_register_decode_chain(&extlog_mce_dec); 303 mce_register_decode_chain(&extlog_mce_dec);
291 /* enable OS to be involved to take over management from BIOS */ 304 /* enable OS to be involved to take over management from BIOS */
292 ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; 305 ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN;
@@ -308,6 +321,7 @@ err:
308 321
309static void __exit extlog_exit(void) 322static void __exit extlog_exit(void)
310{ 323{
324 set_edac_report_status(old_edac_report_status);
311 mce_unregister_decode_chain(&extlog_mce_dec); 325 mce_unregister_decode_chain(&extlog_mce_dec);
312 ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; 326 ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
313 if (extlog_l1_addr) 327 if (extlog_l1_addr)
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index 6d2c49b86b7f..e55584a072c6 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -41,6 +41,7 @@
41#include <linux/rculist.h> 41#include <linux/rculist.h>
42#include <linux/interrupt.h> 42#include <linux/interrupt.h>
43#include <linux/debugfs.h> 43#include <linux/debugfs.h>
44#include <asm/unaligned.h>
44 45
45#include "apei-internal.h" 46#include "apei-internal.h"
46 47
@@ -567,8 +568,7 @@ static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr,
567 bit_offset = reg->bit_offset; 568 bit_offset = reg->bit_offset;
568 access_size_code = reg->access_width; 569 access_size_code = reg->access_width;
569 space_id = reg->space_id; 570 space_id = reg->space_id;
570 /* Handle possible alignment issues */ 571 *paddr = get_unaligned(&reg->address);
571 memcpy(paddr, &reg->address, sizeof(*paddr));
572 if (!*paddr) { 572 if (!*paddr) {
573 pr_warning(FW_BUG APEI_PFX 573 pr_warning(FW_BUG APEI_PFX
574 "Invalid physical address in GAR [0x%llx/%u/%u/%u/%u]\n", 574 "Invalid physical address in GAR [0x%llx/%u/%u/%u/%u]\n",
diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index fb57d03e698b..7dcc8a824aae 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -34,6 +34,7 @@
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/mm.h> 35#include <linux/mm.h>
36#include <acpi/acpi.h> 36#include <acpi/acpi.h>
37#include <asm/unaligned.h>
37 38
38#include "apei-internal.h" 39#include "apei-internal.h"
39 40
@@ -216,7 +217,7 @@ static void check_vendor_extension(u64 paddr,
216static void *einj_get_parameter_address(void) 217static void *einj_get_parameter_address(void)
217{ 218{
218 int i; 219 int i;
219 u64 paddrv4 = 0, paddrv5 = 0; 220 u64 pa_v4 = 0, pa_v5 = 0;
220 struct acpi_whea_header *entry; 221 struct acpi_whea_header *entry;
221 222
222 entry = EINJ_TAB_ENTRY(einj_tab); 223 entry = EINJ_TAB_ENTRY(einj_tab);
@@ -225,30 +226,28 @@ static void *einj_get_parameter_address(void)
225 entry->instruction == ACPI_EINJ_WRITE_REGISTER && 226 entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
226 entry->register_region.space_id == 227 entry->register_region.space_id ==
227 ACPI_ADR_SPACE_SYSTEM_MEMORY) 228 ACPI_ADR_SPACE_SYSTEM_MEMORY)
228 memcpy(&paddrv4, &entry->register_region.address, 229 pa_v4 = get_unaligned(&entry->register_region.address);
229 sizeof(paddrv4));
230 if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS && 230 if (entry->action == ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS &&
231 entry->instruction == ACPI_EINJ_WRITE_REGISTER && 231 entry->instruction == ACPI_EINJ_WRITE_REGISTER &&
232 entry->register_region.space_id == 232 entry->register_region.space_id ==
233 ACPI_ADR_SPACE_SYSTEM_MEMORY) 233 ACPI_ADR_SPACE_SYSTEM_MEMORY)
234 memcpy(&paddrv5, &entry->register_region.address, 234 pa_v5 = get_unaligned(&entry->register_region.address);
235 sizeof(paddrv5));
236 entry++; 235 entry++;
237 } 236 }
238 if (paddrv5) { 237 if (pa_v5) {
239 struct set_error_type_with_address *v5param; 238 struct set_error_type_with_address *v5param;
240 239
241 v5param = acpi_os_map_memory(paddrv5, sizeof(*v5param)); 240 v5param = acpi_os_map_memory(pa_v5, sizeof(*v5param));
242 if (v5param) { 241 if (v5param) {
243 acpi5 = 1; 242 acpi5 = 1;
244 check_vendor_extension(paddrv5, v5param); 243 check_vendor_extension(pa_v5, v5param);
245 return v5param; 244 return v5param;
246 } 245 }
247 } 246 }
248 if (param_extension && paddrv4) { 247 if (param_extension && pa_v4) {
249 struct einj_parameter *v4param; 248 struct einj_parameter *v4param;
250 249
251 v4param = acpi_os_map_memory(paddrv4, sizeof(*v4param)); 250 v4param = acpi_os_map_memory(pa_v4, sizeof(*v4param));
252 if (!v4param) 251 if (!v4param)
253 return NULL; 252 return NULL;
254 if (v4param->reserved1 || v4param->reserved2) { 253 if (v4param->reserved1 || v4param->reserved2) {
@@ -416,7 +415,8 @@ out:
416 return rc; 415 return rc;
417} 416}
418 417
419static int __einj_error_inject(u32 type, u64 param1, u64 param2) 418static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
419 u64 param3, u64 param4)
420{ 420{
421 struct apei_exec_context ctx; 421 struct apei_exec_context ctx;
422 u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT; 422 u64 val, trigger_paddr, timeout = FIRMWARE_TIMEOUT;
@@ -446,6 +446,12 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
446 break; 446 break;
447 } 447 }
448 v5param->flags = vendor_flags; 448 v5param->flags = vendor_flags;
449 } else if (flags) {
450 v5param->flags = flags;
451 v5param->memory_address = param1;
452 v5param->memory_address_range = param2;
453 v5param->apicid = param3;
454 v5param->pcie_sbdf = param4;
449 } else { 455 } else {
450 switch (type) { 456 switch (type) {
451 case ACPI_EINJ_PROCESSOR_CORRECTABLE: 457 case ACPI_EINJ_PROCESSOR_CORRECTABLE:
@@ -514,11 +520,17 @@ static int __einj_error_inject(u32 type, u64 param1, u64 param2)
514} 520}
515 521
516/* Inject the specified hardware error */ 522/* Inject the specified hardware error */
517static int einj_error_inject(u32 type, u64 param1, u64 param2) 523static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
524 u64 param3, u64 param4)
518{ 525{
519 int rc; 526 int rc;
520 unsigned long pfn; 527 unsigned long pfn;
521 528
529 /* If user manually set "flags", make sure it is legal */
530 if (flags && (flags &
531 ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
532 return -EINVAL;
533
522 /* 534 /*
523 * We need extra sanity checks for memory errors. 535 * We need extra sanity checks for memory errors.
524 * Other types leap directly to injection. 536 * Other types leap directly to injection.
@@ -532,7 +544,7 @@ static int einj_error_inject(u32 type, u64 param1, u64 param2)
532 if (type & ACPI5_VENDOR_BIT) { 544 if (type & ACPI5_VENDOR_BIT) {
533 if (vendor_flags != SETWA_FLAGS_MEM) 545 if (vendor_flags != SETWA_FLAGS_MEM)
534 goto inject; 546 goto inject;
535 } else if (!(type & MEM_ERROR_MASK)) 547 } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
536 goto inject; 548 goto inject;
537 549
538 /* 550 /*
@@ -546,15 +558,18 @@ static int einj_error_inject(u32 type, u64 param1, u64 param2)
546 558
547inject: 559inject:
548 mutex_lock(&einj_mutex); 560 mutex_lock(&einj_mutex);
549 rc = __einj_error_inject(type, param1, param2); 561 rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
550 mutex_unlock(&einj_mutex); 562 mutex_unlock(&einj_mutex);
551 563
552 return rc; 564 return rc;
553} 565}
554 566
555static u32 error_type; 567static u32 error_type;
568static u32 error_flags;
556static u64 error_param1; 569static u64 error_param1;
557static u64 error_param2; 570static u64 error_param2;
571static u64 error_param3;
572static u64 error_param4;
558static struct dentry *einj_debug_dir; 573static struct dentry *einj_debug_dir;
559 574
560static int available_error_type_show(struct seq_file *m, void *v) 575static int available_error_type_show(struct seq_file *m, void *v)
@@ -648,7 +663,8 @@ static int error_inject_set(void *data, u64 val)
648 if (!error_type) 663 if (!error_type)
649 return -EINVAL; 664 return -EINVAL;
650 665
651 return einj_error_inject(error_type, error_param1, error_param2); 666 return einj_error_inject(error_type, error_flags, error_param1, error_param2,
667 error_param3, error_param4);
652} 668}
653 669
654DEFINE_SIMPLE_ATTRIBUTE(error_inject_fops, NULL, 670DEFINE_SIMPLE_ATTRIBUTE(error_inject_fops, NULL,
@@ -729,6 +745,10 @@ static int __init einj_init(void)
729 rc = -ENOMEM; 745 rc = -ENOMEM;
730 einj_param = einj_get_parameter_address(); 746 einj_param = einj_get_parameter_address();
731 if ((param_extension || acpi5) && einj_param) { 747 if ((param_extension || acpi5) && einj_param) {
748 fentry = debugfs_create_x32("flags", S_IRUSR | S_IWUSR,
749 einj_debug_dir, &error_flags);
750 if (!fentry)
751 goto err_unmap;
732 fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR, 752 fentry = debugfs_create_x64("param1", S_IRUSR | S_IWUSR,
733 einj_debug_dir, &error_param1); 753 einj_debug_dir, &error_param1);
734 if (!fentry) 754 if (!fentry)
@@ -737,6 +757,14 @@ static int __init einj_init(void)
737 einj_debug_dir, &error_param2); 757 einj_debug_dir, &error_param2);
738 if (!fentry) 758 if (!fentry)
739 goto err_unmap; 759 goto err_unmap;
760 fentry = debugfs_create_x64("param3", S_IRUSR | S_IWUSR,
761 einj_debug_dir, &error_param3);
762 if (!fentry)
763 goto err_unmap;
764 fentry = debugfs_create_x64("param4", S_IRUSR | S_IWUSR,
765 einj_debug_dir, &error_param4);
766 if (!fentry)
767 goto err_unmap;
740 768
741 fentry = debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR, 769 fentry = debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR,
742 einj_debug_dir, &notrigger); 770 einj_debug_dir, &notrigger);
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index cb1d557fc22c..ed65e9c4b5b0 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -611,7 +611,7 @@ static void __erst_record_id_cache_compact(void)
611 if (entries[i] == APEI_ERST_INVALID_RECORD_ID) 611 if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
612 continue; 612 continue;
613 if (wpos != i) 613 if (wpos != i)
614 memcpy(&entries[wpos], &entries[i], sizeof(entries[i])); 614 entries[wpos] = entries[i];
615 wpos++; 615 wpos++;
616 } 616 }
617 erst_record_id_cache.len = wpos; 617 erst_record_id_cache.len = wpos;
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index a30bc313787b..46766ef7ef5d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -413,27 +413,31 @@ static void ghes_handle_memory_failure(struct acpi_generic_data *gdata, int sev)
413{ 413{
414#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE 414#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
415 unsigned long pfn; 415 unsigned long pfn;
416 int flags = -1;
416 int sec_sev = ghes_severity(gdata->error_severity); 417 int sec_sev = ghes_severity(gdata->error_severity);
417 struct cper_sec_mem_err *mem_err; 418 struct cper_sec_mem_err *mem_err;
418 mem_err = (struct cper_sec_mem_err *)(gdata + 1); 419 mem_err = (struct cper_sec_mem_err *)(gdata + 1);
419 420
420 if (sec_sev == GHES_SEV_CORRECTED && 421 if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
421 (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) && 422 return;
422 (mem_err->validation_bits & CPER_MEM_VALID_PA)) { 423
423 pfn = mem_err->physical_addr >> PAGE_SHIFT; 424 pfn = mem_err->physical_addr >> PAGE_SHIFT;
424 if (pfn_valid(pfn)) 425 if (!pfn_valid(pfn)) {
425 memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE); 426 pr_warn_ratelimited(FW_WARN GHES_PFX
426 else if (printk_ratelimit()) 427 "Invalid address in generic error data: %#llx\n",
427 pr_warn(FW_WARN GHES_PFX 428 mem_err->physical_addr);
428 "Invalid address in generic error data: %#llx\n", 429 return;
429 mem_err->physical_addr);
430 }
431 if (sev == GHES_SEV_RECOVERABLE &&
432 sec_sev == GHES_SEV_RECOVERABLE &&
433 mem_err->validation_bits & CPER_MEM_VALID_PA) {
434 pfn = mem_err->physical_addr >> PAGE_SHIFT;
435 memory_failure_queue(pfn, 0, 0);
436 } 430 }
431
432 /* iff following two events can be handled properly by now */
433 if (sec_sev == GHES_SEV_CORRECTED &&
434 (gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
435 flags = MF_SOFT_OFFLINE;
436 if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
437 flags = 0;
438
439 if (flags != -1)
440 memory_failure_queue(pfn, 0, flags);
437#endif 441#endif
438} 442}
439 443
@@ -453,8 +457,7 @@ static void ghes_do_proc(struct ghes *ghes,
453 ghes_edac_report_mem_error(ghes, sev, mem_err); 457 ghes_edac_report_mem_error(ghes, sev, mem_err);
454 458
455#ifdef CONFIG_X86_MCE 459#ifdef CONFIG_X86_MCE
456 apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, 460 apei_mce_report_mem_error(sev, mem_err);
457 mem_err);
458#endif 461#endif
459 ghes_handle_memory_failure(gdata, sev); 462 ghes_handle_memory_failure(gdata, sev);
460 } 463 }
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index 351945fa2ecd..9d9e18aefaaa 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -29,6 +29,25 @@ EXPORT_SYMBOL_GPL(edac_err_assert);
29 29
30static atomic_t edac_subsys_valid = ATOMIC_INIT(0); 30static atomic_t edac_subsys_valid = ATOMIC_INIT(0);
31 31
32int edac_report_status = EDAC_REPORTING_ENABLED;
33EXPORT_SYMBOL_GPL(edac_report_status);
34
35static int __init edac_report_setup(char *str)
36{
37 if (!str)
38 return -EINVAL;
39
40 if (!strncmp(str, "on", 2))
41 set_edac_report_status(EDAC_REPORTING_ENABLED);
42 else if (!strncmp(str, "off", 3))
43 set_edac_report_status(EDAC_REPORTING_DISABLED);
44 else if (!strncmp(str, "force", 5))
45 set_edac_report_status(EDAC_REPORTING_FORCE);
46
47 return 0;
48}
49__setup("edac_report=", edac_report_setup);
50
32/* 51/*
33 * called to determine if there is an EDAC driver interested in 52 * called to determine if there is an EDAC driver interested in
34 * knowing an event (such as NMI) occurred 53 * knowing an event (such as NMI) occurred
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index de988c8da1c8..54e2abe671f7 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -1829,6 +1829,9 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
1829 struct mem_ctl_info *mci; 1829 struct mem_ctl_info *mci;
1830 struct sbridge_pvt *pvt; 1830 struct sbridge_pvt *pvt;
1831 1831
1832 if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
1833 return NOTIFY_DONE;
1834
1832 mci = get_mci_for_node_id(mce->socketid); 1835 mci = get_mci_for_node_id(mce->socketid);
1833 if (!mci) 1836 if (!mci)
1834 return NOTIFY_BAD; 1837 return NOTIFY_BAD;
@@ -2142,9 +2145,10 @@ static int __init sbridge_init(void)
2142 opstate_init(); 2145 opstate_init();
2143 2146
2144 pci_rc = pci_register_driver(&sbridge_driver); 2147 pci_rc = pci_register_driver(&sbridge_driver);
2145
2146 if (pci_rc >= 0) { 2148 if (pci_rc >= 0) {
2147 mce_register_decode_chain(&sbridge_mce_dec); 2149 mce_register_decode_chain(&sbridge_mce_dec);
2150 if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
2151 sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
2148 return 0; 2152 return 0;
2149 } 2153 }
2150 2154
diff --git a/include/linux/edac.h b/include/linux/edac.h
index dbdffe8d4469..8e6c20af11a2 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -35,6 +35,34 @@ extern void edac_atomic_assert_error(void);
35extern struct bus_type *edac_get_sysfs_subsys(void); 35extern struct bus_type *edac_get_sysfs_subsys(void);
36extern void edac_put_sysfs_subsys(void); 36extern void edac_put_sysfs_subsys(void);
37 37
38enum {
39 EDAC_REPORTING_ENABLED,
40 EDAC_REPORTING_DISABLED,
41 EDAC_REPORTING_FORCE
42};
43
44extern int edac_report_status;
45#ifdef CONFIG_EDAC
46static inline int get_edac_report_status(void)
47{
48 return edac_report_status;
49}
50
51static inline void set_edac_report_status(int new)
52{
53 edac_report_status = new;
54}
55#else
56static inline int get_edac_report_status(void)
57{
58 return EDAC_REPORTING_DISABLED;
59}
60
61static inline void set_edac_report_status(int new)
62{
63}
64#endif
65
38static inline void opstate_init(void) 66static inline void opstate_init(void)
39{ 67{
40 switch (edac_op_state) { 68 switch (edac_op_state) {
diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h
index 88b878383797..1c875ad1ee5f 100644
--- a/include/trace/events/ras.h
+++ b/include/trace/events/ras.h
@@ -5,7 +5,7 @@
5#define _TRACE_AER_H 5#define _TRACE_AER_H
6 6
7#include <linux/tracepoint.h> 7#include <linux/tracepoint.h>
8#include <linux/edac.h> 8#include <linux/aer.h>
9 9
10 10
11/* 11/*
@@ -63,10 +63,10 @@ TRACE_EVENT(aer_event,
63 63
64 TP_printk("%s PCIe Bus Error: severity=%s, %s\n", 64 TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
65 __get_str(dev_name), 65 __get_str(dev_name),
66 __entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" : 66 __entry->severity == AER_CORRECTABLE ? "Corrected" :
67 __entry->severity == HW_EVENT_ERR_FATAL ? 67 __entry->severity == AER_FATAL ?
68 "Fatal" : "Uncorrected", 68 "Fatal" : "Uncorrected, non-fatal",
69 __entry->severity == HW_EVENT_ERR_CORRECTED ? 69 __entry->severity == AER_CORRECTABLE ?
70 __print_flags(__entry->status, "|", aer_correctable_errors) : 70 __print_flags(__entry->status, "|", aer_correctable_errors) :
71 __print_flags(__entry->status, "|", aer_uncorrectable_errors)) 71 __print_flags(__entry->status, "|", aer_uncorrectable_errors))
72); 72);