aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 20:21:59 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 20:21:59 -0400
commitd782cebd6b39b4caab8a913180c0acfd6c33e9c2 (patch)
treee8ed959e9475f57bf7f2a0753e5a0f7cf04c8f75 /drivers
parent8556d44fee6ded9f4287d7ff7b5cc9d8635b0be0 (diff)
parentc3107e3c504d3187ed8eac8179494946faff1481 (diff)
Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Ingo Molnar: "The main changes in this cycle are: - RAS tracing/events infrastructure, by Gong Chen. - Various generalizations of the APEI code to make it available to non-x86 architectures, by Tomasz Nowicki" * 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/ras: Fix build warnings in <linux/aer.h> acpi, apei, ghes: Factor out ioremap virtual memory for IRQ and NMI context. acpi, apei, ghes: Make NMI error notification to be GHES architecture extension. apei, mce: Factor out APEI architecture specific MCE calls. RAS, extlog: Adjust init flow trace, eMCA: Add a knob to adjust where to save event log trace, RAS: Add eMCA trace event interface RAS, debugfs: Add debugfs interface for RAS subsystem CPER: Adjust code flow of some functions x86, MCE: Robustify mcheck_init_device trace, AER: Move trace into unified interface trace, RAS: Add basic RAS trace event x86, MCE: Kill CPU_POST_DEAD
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile1
-rw-r--r--drivers/acpi/Kconfig4
-rw-r--r--drivers/acpi/acpi_extlog.c46
-rw-r--r--drivers/acpi/apei/Kconfig8
-rw-r--r--drivers/acpi/apei/apei-base.c13
-rw-r--r--drivers/acpi/apei/ghes.c173
-rw-r--r--drivers/acpi/apei/hest.c29
-rw-r--r--drivers/edac/Kconfig1
-rw-r--r--drivers/edac/edac_mc.c3
-rw-r--r--drivers/firmware/efi/cper.c192
-rw-r--r--drivers/pci/pcie/aer/Kconfig1
-rw-r--r--drivers/pci/pcie/aer/aerdrv_errprint.c4
-rw-r--r--drivers/ras/Kconfig2
-rw-r--r--drivers/ras/Makefile1
-rw-r--r--drivers/ras/debugfs.c56
-rw-r--r--drivers/ras/ras.c29
17 files changed, 400 insertions, 165 deletions
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 0e87a34b6472..4e6e66c3c8d6 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -176,4 +176,6 @@ source "drivers/powercap/Kconfig"
176 176
177source "drivers/mcb/Kconfig" 177source "drivers/mcb/Kconfig"
178 178
179source "drivers/ras/Kconfig"
180
179endmenu 181endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index f98b50d8251d..65c32b1cea3d 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -158,3 +158,4 @@ obj-$(CONFIG_NTB) += ntb/
158obj-$(CONFIG_FMC) += fmc/ 158obj-$(CONFIG_FMC) += fmc/
159obj-$(CONFIG_POWERCAP) += powercap/ 159obj-$(CONFIG_POWERCAP) += powercap/
160obj-$(CONFIG_MCB) += mcb/ 160obj-$(CONFIG_MCB) += mcb/
161obj-$(CONFIG_RAS) += ras/
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index a34a22841002..206942b8d105 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -370,6 +370,7 @@ config ACPI_EXTLOG
370 tristate "Extended Error Log support" 370 tristate "Extended Error Log support"
371 depends on X86_MCE && X86_LOCAL_APIC 371 depends on X86_MCE && X86_LOCAL_APIC
372 select UEFI_CPER 372 select UEFI_CPER
373 select RAS
373 default n 374 default n
374 help 375 help
375 Certain usages such as Predictive Failure Analysis (PFA) require 376 Certain usages such as Predictive Failure Analysis (PFA) require
@@ -384,6 +385,7 @@ config ACPI_EXTLOG
384 385
385 Enhanced MCA Logging allows firmware to provide additional error 386 Enhanced MCA Logging allows firmware to provide additional error
386 information to system software, synchronous with MCE or CMCI. This 387 information to system software, synchronous with MCE or CMCI. This
387 driver adds support for that functionality. 388 driver adds support for that functionality with corresponding
389 tracepoint which carries that information to userspace.
388 390
389endif # ACPI 391endif # ACPI
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 185334114d71..0ad6f389d922 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -12,10 +12,12 @@
12#include <linux/cper.h> 12#include <linux/cper.h>
13#include <linux/ratelimit.h> 13#include <linux/ratelimit.h>
14#include <linux/edac.h> 14#include <linux/edac.h>
15#include <linux/ras.h>
15#include <asm/cpu.h> 16#include <asm/cpu.h>
16#include <asm/mce.h> 17#include <asm/mce.h>
17 18
18#include "apei/apei-internal.h" 19#include "apei/apei-internal.h"
20#include <ras/ras_event.h>
19 21
20#define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */ 22#define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */
21 23
@@ -137,8 +139,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
137 struct mce *mce = (struct mce *)data; 139 struct mce *mce = (struct mce *)data;
138 int bank = mce->bank; 140 int bank = mce->bank;
139 int cpu = mce->extcpu; 141 int cpu = mce->extcpu;
140 struct acpi_generic_status *estatus; 142 struct acpi_generic_status *estatus, *tmp;
141 int rc; 143 struct acpi_generic_data *gdata;
144 const uuid_le *fru_id = &NULL_UUID_LE;
145 char *fru_text = "";
146 uuid_le *sec_type;
147 static u32 err_seq;
142 148
143 estatus = extlog_elog_entry_check(cpu, bank); 149 estatus = extlog_elog_entry_check(cpu, bank);
144 if (estatus == NULL) 150 if (estatus == NULL)
@@ -148,8 +154,29 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
148 /* clear record status to enable BIOS to update it again */ 154 /* clear record status to enable BIOS to update it again */
149 estatus->block_status = 0; 155 estatus->block_status = 0;
150 156
151 rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu); 157 tmp = (struct acpi_generic_status *)elog_buf;
158
159 if (!ras_userspace_consumers()) {
160 print_extlog_rcd(NULL, tmp, cpu);
161 goto out;
162 }
163
164 /* log event via trace */
165 err_seq++;
166 gdata = (struct acpi_generic_data *)(tmp + 1);
167 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
168 fru_id = (uuid_le *)gdata->fru_id;
169 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
170 fru_text = gdata->fru_text;
171 sec_type = (uuid_le *)gdata->section_type;
172 if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
173 struct cper_sec_mem_err *mem = (void *)(gdata + 1);
174 if (gdata->error_data_length >= sizeof(*mem))
175 trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
176 (u8)gdata->error_severity);
177 }
152 178
179out:
153 return NOTIFY_STOP; 180 return NOTIFY_STOP;
154} 181}
155 182
@@ -196,19 +223,16 @@ static int __init extlog_init(void)
196 u64 cap; 223 u64 cap;
197 int rc; 224 int rc;
198 225
226 rdmsrl(MSR_IA32_MCG_CAP, cap);
227
228 if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr())
229 return -ENODEV;
230
199 if (get_edac_report_status() == EDAC_REPORTING_FORCE) { 231 if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
200 pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n"); 232 pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
201 return -EPERM; 233 return -EPERM;
202 } 234 }
203 235
204 rc = -ENODEV;
205 rdmsrl(MSR_IA32_MCG_CAP, cap);
206 if (!(cap & MCG_ELOG_P))
207 return rc;
208
209 if (!extlog_get_l1addr())
210 return rc;
211
212 rc = -EINVAL; 236 rc = -EINVAL;
213 /* get L1 header to fetch necessary information */ 237 /* get L1 header to fetch necessary information */
214 l1_hdr_size = sizeof(struct extlog_l1_head); 238 l1_hdr_size = sizeof(struct extlog_l1_head);
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index c4dac7150960..b0140c8fc733 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -1,9 +1,15 @@
1config HAVE_ACPI_APEI
2 bool
3
4config HAVE_ACPI_APEI_NMI
5 bool
6
1config ACPI_APEI 7config ACPI_APEI
2 bool "ACPI Platform Error Interface (APEI)" 8 bool "ACPI Platform Error Interface (APEI)"
3 select MISC_FILESYSTEMS 9 select MISC_FILESYSTEMS
4 select PSTORE 10 select PSTORE
5 select UEFI_CPER 11 select UEFI_CPER
6 depends on X86 12 depends on HAVE_ACPI_APEI
7 help 13 help
8 APEI allows to report errors (for example from the chipset) 14 APEI allows to report errors (for example from the chipset)
9 to the operating system. This improves NMI handling 15 to the operating system. This improves NMI handling
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index 8678dfe5366b..2cd7bdd6c8b3 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -745,6 +745,19 @@ struct dentry *apei_get_debugfs_dir(void)
745} 745}
746EXPORT_SYMBOL_GPL(apei_get_debugfs_dir); 746EXPORT_SYMBOL_GPL(apei_get_debugfs_dir);
747 747
748int __weak arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr,
749 void *data)
750{
751 return 1;
752}
753EXPORT_SYMBOL_GPL(arch_apei_enable_cmcff);
754
755void __weak arch_apei_report_mem_error(int sev,
756 struct cper_sec_mem_err *mem_err)
757{
758}
759EXPORT_SYMBOL_GPL(arch_apei_report_mem_error);
760
748int apei_osc_setup(void) 761int apei_osc_setup(void)
749{ 762{
750 static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c"; 763 static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c";
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index dab7cb7349df..e05d84e7b06d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -47,11 +47,11 @@
47#include <linux/genalloc.h> 47#include <linux/genalloc.h>
48#include <linux/pci.h> 48#include <linux/pci.h>
49#include <linux/aer.h> 49#include <linux/aer.h>
50#include <linux/nmi.h>
50 51
51#include <acpi/ghes.h> 52#include <acpi/ghes.h>
52#include <asm/mce.h> 53#include <acpi/apei.h>
53#include <asm/tlbflush.h> 54#include <asm/tlbflush.h>
54#include <asm/nmi.h>
55 55
56#include "apei-internal.h" 56#include "apei-internal.h"
57 57
@@ -86,8 +86,6 @@
86bool ghes_disable; 86bool ghes_disable;
87module_param_named(disable, ghes_disable, bool, 0); 87module_param_named(disable, ghes_disable, bool, 0);
88 88
89static int ghes_panic_timeout __read_mostly = 30;
90
91/* 89/*
92 * All error sources notified with SCI shares one notifier function, 90 * All error sources notified with SCI shares one notifier function,
93 * so they need to be linked and checked one by one. This is applied 91 * so they need to be linked and checked one by one. This is applied
@@ -97,16 +95,9 @@ static int ghes_panic_timeout __read_mostly = 30;
97 * list changing, not for traversing. 95 * list changing, not for traversing.
98 */ 96 */
99static LIST_HEAD(ghes_sci); 97static LIST_HEAD(ghes_sci);
100static LIST_HEAD(ghes_nmi);
101static DEFINE_MUTEX(ghes_list_mutex); 98static DEFINE_MUTEX(ghes_list_mutex);
102 99
103/* 100/*
104 * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
105 * mutual exclusion.
106 */
107static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
108
109/*
110 * Because the memory area used to transfer hardware error information 101 * Because the memory area used to transfer hardware error information
111 * from BIOS to Linux can be determined only in NMI, IRQ or timer 102 * from BIOS to Linux can be determined only in NMI, IRQ or timer
112 * handler, but general ioremap can not be used in atomic context, so 103 * handler, but general ioremap can not be used in atomic context, so
@@ -114,12 +105,16 @@ static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
114 */ 105 */
115 106
116/* 107/*
117 * Two virtual pages are used, one for NMI context, the other for 108 * Two virtual pages are used, one for IRQ/PROCESS context, the other for
118 * IRQ/PROCESS context 109 * NMI context (optionally).
119 */ 110 */
120#define GHES_IOREMAP_PAGES 2 111#ifdef CONFIG_HAVE_ACPI_APEI_NMI
121#define GHES_IOREMAP_NMI_PAGE(base) (base) 112#define GHES_IOREMAP_PAGES 2
122#define GHES_IOREMAP_IRQ_PAGE(base) ((base) + PAGE_SIZE) 113#else
114#define GHES_IOREMAP_PAGES 1
115#endif
116#define GHES_IOREMAP_IRQ_PAGE(base) (base)
117#define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE)
123 118
124/* virtual memory area for atomic ioremap */ 119/* virtual memory area for atomic ioremap */
125static struct vm_struct *ghes_ioremap_area; 120static struct vm_struct *ghes_ioremap_area;
@@ -130,18 +125,8 @@ static struct vm_struct *ghes_ioremap_area;
130static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); 125static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
131static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); 126static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
132 127
133/*
134 * printk is not safe in NMI context. So in NMI handler, we allocate
135 * required memory from lock-less memory allocator
136 * (ghes_estatus_pool), save estatus into it, put them into lock-less
137 * list (ghes_estatus_llist), then delay printk into IRQ context via
138 * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
139 * required pool size by all NMI error source.
140 */
141static struct gen_pool *ghes_estatus_pool; 128static struct gen_pool *ghes_estatus_pool;
142static unsigned long ghes_estatus_pool_size_request; 129static unsigned long ghes_estatus_pool_size_request;
143static struct llist_head ghes_estatus_llist;
144static struct irq_work ghes_proc_irq_work;
145 130
146struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; 131struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
147static atomic_t ghes_estatus_cache_alloced; 132static atomic_t ghes_estatus_cache_alloced;
@@ -192,7 +177,7 @@ static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
192 177
193 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base)); 178 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
194 unmap_kernel_range_noflush(vaddr, PAGE_SIZE); 179 unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
195 __flush_tlb_one(vaddr); 180 arch_apei_flush_tlb_one(vaddr);
196} 181}
197 182
198static void ghes_iounmap_irq(void __iomem *vaddr_ptr) 183static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
@@ -202,7 +187,7 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
202 187
203 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base)); 188 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
204 unmap_kernel_range_noflush(vaddr, PAGE_SIZE); 189 unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
205 __flush_tlb_one(vaddr); 190 arch_apei_flush_tlb_one(vaddr);
206} 191}
207 192
208static int ghes_estatus_pool_init(void) 193static int ghes_estatus_pool_init(void)
@@ -249,11 +234,6 @@ static int ghes_estatus_pool_expand(unsigned long len)
249 return 0; 234 return 0;
250} 235}
251 236
252static void ghes_estatus_pool_shrink(unsigned long len)
253{
254 ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
255}
256
257static struct ghes *ghes_new(struct acpi_hest_generic *generic) 237static struct ghes *ghes_new(struct acpi_hest_generic *generic)
258{ 238{
259 struct ghes *ghes; 239 struct ghes *ghes;
@@ -455,9 +435,7 @@ static void ghes_do_proc(struct ghes *ghes,
455 mem_err = (struct cper_sec_mem_err *)(gdata+1); 435 mem_err = (struct cper_sec_mem_err *)(gdata+1);
456 ghes_edac_report_mem_error(ghes, sev, mem_err); 436 ghes_edac_report_mem_error(ghes, sev, mem_err);
457 437
458#ifdef CONFIG_X86_MCE 438 arch_apei_report_mem_error(sev, mem_err);
459 apei_mce_report_mem_error(sev, mem_err);
460#endif
461 ghes_handle_memory_failure(gdata, sev); 439 ghes_handle_memory_failure(gdata, sev);
462 } 440 }
463#ifdef CONFIG_ACPI_APEI_PCIEAER 441#ifdef CONFIG_ACPI_APEI_PCIEAER
@@ -734,6 +712,32 @@ static int ghes_notify_sci(struct notifier_block *this,
734 return ret; 712 return ret;
735} 713}
736 714
715static struct notifier_block ghes_notifier_sci = {
716 .notifier_call = ghes_notify_sci,
717};
718
719#ifdef CONFIG_HAVE_ACPI_APEI_NMI
720/*
721 * printk is not safe in NMI context. So in NMI handler, we allocate
722 * required memory from lock-less memory allocator
723 * (ghes_estatus_pool), save estatus into it, put them into lock-less
724 * list (ghes_estatus_llist), then delay printk into IRQ context via
725 * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
726 * required pool size by all NMI error source.
727 */
728static struct llist_head ghes_estatus_llist;
729static struct irq_work ghes_proc_irq_work;
730
731/*
732 * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
733 * mutual exclusion.
734 */
735static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
736
737static LIST_HEAD(ghes_nmi);
738
739static int ghes_panic_timeout __read_mostly = 30;
740
737static struct llist_node *llist_nodes_reverse(struct llist_node *llnode) 741static struct llist_node *llist_nodes_reverse(struct llist_node *llnode)
738{ 742{
739 struct llist_node *next, *tail = NULL; 743 struct llist_node *next, *tail = NULL;
@@ -877,10 +881,6 @@ out:
877 return ret; 881 return ret;
878} 882}
879 883
880static struct notifier_block ghes_notifier_sci = {
881 .notifier_call = ghes_notify_sci,
882};
883
884static unsigned long ghes_esource_prealloc_size( 884static unsigned long ghes_esource_prealloc_size(
885 const struct acpi_hest_generic *generic) 885 const struct acpi_hest_generic *generic)
886{ 886{
@@ -896,11 +896,71 @@ static unsigned long ghes_esource_prealloc_size(
896 return prealloc_size; 896 return prealloc_size;
897} 897}
898 898
899static void ghes_estatus_pool_shrink(unsigned long len)
900{
901 ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
902}
903
904static void ghes_nmi_add(struct ghes *ghes)
905{
906 unsigned long len;
907
908 len = ghes_esource_prealloc_size(ghes->generic);
909 ghes_estatus_pool_expand(len);
910 mutex_lock(&ghes_list_mutex);
911 if (list_empty(&ghes_nmi))
912 register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
913 list_add_rcu(&ghes->list, &ghes_nmi);
914 mutex_unlock(&ghes_list_mutex);
915}
916
917static void ghes_nmi_remove(struct ghes *ghes)
918{
919 unsigned long len;
920
921 mutex_lock(&ghes_list_mutex);
922 list_del_rcu(&ghes->list);
923 if (list_empty(&ghes_nmi))
924 unregister_nmi_handler(NMI_LOCAL, "ghes");
925 mutex_unlock(&ghes_list_mutex);
926 /*
927 * To synchronize with NMI handler, ghes can only be
928 * freed after NMI handler finishes.
929 */
930 synchronize_rcu();
931 len = ghes_esource_prealloc_size(ghes->generic);
932 ghes_estatus_pool_shrink(len);
933}
934
935static void ghes_nmi_init_cxt(void)
936{
937 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
938}
939#else /* CONFIG_HAVE_ACPI_APEI_NMI */
940static inline void ghes_nmi_add(struct ghes *ghes)
941{
942 pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n",
943 ghes->generic->header.source_id);
944 BUG();
945}
946
947static inline void ghes_nmi_remove(struct ghes *ghes)
948{
949 pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
950 ghes->generic->header.source_id);
951 BUG();
952}
953
954static inline void ghes_nmi_init_cxt(void)
955{
956}
957#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
958
899static int ghes_probe(struct platform_device *ghes_dev) 959static int ghes_probe(struct platform_device *ghes_dev)
900{ 960{
901 struct acpi_hest_generic *generic; 961 struct acpi_hest_generic *generic;
902 struct ghes *ghes = NULL; 962 struct ghes *ghes = NULL;
903 unsigned long len; 963
904 int rc = -EINVAL; 964 int rc = -EINVAL;
905 965
906 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; 966 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
@@ -911,7 +971,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
911 case ACPI_HEST_NOTIFY_POLLED: 971 case ACPI_HEST_NOTIFY_POLLED:
912 case ACPI_HEST_NOTIFY_EXTERNAL: 972 case ACPI_HEST_NOTIFY_EXTERNAL:
913 case ACPI_HEST_NOTIFY_SCI: 973 case ACPI_HEST_NOTIFY_SCI:
974 break;
914 case ACPI_HEST_NOTIFY_NMI: 975 case ACPI_HEST_NOTIFY_NMI:
976 if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
977 pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
978 generic->header.source_id);
979 goto err;
980 }
915 break; 981 break;
916 case ACPI_HEST_NOTIFY_LOCAL: 982 case ACPI_HEST_NOTIFY_LOCAL:
917 pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", 983 pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
@@ -972,14 +1038,7 @@ static int ghes_probe(struct platform_device *ghes_dev)
972 mutex_unlock(&ghes_list_mutex); 1038 mutex_unlock(&ghes_list_mutex);
973 break; 1039 break;
974 case ACPI_HEST_NOTIFY_NMI: 1040 case ACPI_HEST_NOTIFY_NMI:
975 len = ghes_esource_prealloc_size(generic); 1041 ghes_nmi_add(ghes);
976 ghes_estatus_pool_expand(len);
977 mutex_lock(&ghes_list_mutex);
978 if (list_empty(&ghes_nmi))
979 register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0,
980 "ghes");
981 list_add_rcu(&ghes->list, &ghes_nmi);
982 mutex_unlock(&ghes_list_mutex);
983 break; 1042 break;
984 default: 1043 default:
985 BUG(); 1044 BUG();
@@ -1001,7 +1060,6 @@ static int ghes_remove(struct platform_device *ghes_dev)
1001{ 1060{
1002 struct ghes *ghes; 1061 struct ghes *ghes;
1003 struct acpi_hest_generic *generic; 1062 struct acpi_hest_generic *generic;
1004 unsigned long len;
1005 1063
1006 ghes = platform_get_drvdata(ghes_dev); 1064 ghes = platform_get_drvdata(ghes_dev);
1007 generic = ghes->generic; 1065 generic = ghes->generic;
@@ -1022,18 +1080,7 @@ static int ghes_remove(struct platform_device *ghes_dev)
1022 mutex_unlock(&ghes_list_mutex); 1080 mutex_unlock(&ghes_list_mutex);
1023 break; 1081 break;
1024 case ACPI_HEST_NOTIFY_NMI: 1082 case ACPI_HEST_NOTIFY_NMI:
1025 mutex_lock(&ghes_list_mutex); 1083 ghes_nmi_remove(ghes);
1026 list_del_rcu(&ghes->list);
1027 if (list_empty(&ghes_nmi))
1028 unregister_nmi_handler(NMI_LOCAL, "ghes");
1029 mutex_unlock(&ghes_list_mutex);
1030 /*
1031 * To synchronize with NMI handler, ghes can only be
1032 * freed after NMI handler finishes.
1033 */
1034 synchronize_rcu();
1035 len = ghes_esource_prealloc_size(generic);
1036 ghes_estatus_pool_shrink(len);
1037 break; 1084 break;
1038 default: 1085 default:
1039 BUG(); 1086 BUG();
@@ -1077,7 +1124,7 @@ static int __init ghes_init(void)
1077 return -EINVAL; 1124 return -EINVAL;
1078 } 1125 }
1079 1126
1080 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); 1127 ghes_nmi_init_cxt();
1081 1128
1082 rc = ghes_ioremap_init(); 1129 rc = ghes_ioremap_init();
1083 if (rc) 1130 if (rc)
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index f5e37f32c71f..06e9b411a0a2 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -36,7 +36,6 @@
36#include <linux/io.h> 36#include <linux/io.h>
37#include <linux/platform_device.h> 37#include <linux/platform_device.h>
38#include <acpi/apei.h> 38#include <acpi/apei.h>
39#include <asm/mce.h>
40 39
41#include "apei-internal.h" 40#include "apei-internal.h"
42 41
@@ -128,33 +127,7 @@ EXPORT_SYMBOL_GPL(apei_hest_parse);
128 */ 127 */
129static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data) 128static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data)
130{ 129{
131#ifdef CONFIG_X86_MCE 130 return arch_apei_enable_cmcff(hest_hdr, data);
132 int i;
133 struct acpi_hest_ia_corrected *cmc;
134 struct acpi_hest_ia_error_bank *mc_bank;
135
136 if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
137 return 0;
138
139 cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
140 if (!cmc->enabled)
141 return 0;
142
143 /*
144 * We expect HEST to provide a list of MC banks that report errors
145 * in firmware first mode. Otherwise, return non-zero value to
146 * indicate that we are done parsing HEST.
147 */
148 if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || !cmc->num_hardware_banks)
149 return 1;
150
151 pr_info(HEST_PFX "Enabling Firmware First mode for corrected errors.\n");
152
153 mc_bank = (struct acpi_hest_ia_error_bank *)(cmc + 1);
154 for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++)
155 mce_disable_bank(mc_bank->bank_number);
156#endif
157 return 1;
158} 131}
159 132
160struct ghes_arr { 133struct ghes_arr {
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index e339c6b91425..f8665f9c3e03 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -72,6 +72,7 @@ config EDAC_MCE_INJ
72 72
73config EDAC_MM_EDAC 73config EDAC_MM_EDAC
74 tristate "Main Memory EDAC (Error Detection And Correction) reporting" 74 tristate "Main Memory EDAC (Error Detection And Correction) reporting"
75 select RAS
75 help 76 help
76 Some systems are able to detect and correct errors in main 77 Some systems are able to detect and correct errors in main
77 memory. EDAC can report statistics on memory error 78 memory. EDAC can report statistics on memory error
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 2c694b5297cc..9f134823fa75 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -33,9 +33,6 @@
33#include <asm/edac.h> 33#include <asm/edac.h>
34#include "edac_core.h" 34#include "edac_core.h"
35#include "edac_module.h" 35#include "edac_module.h"
36
37#define CREATE_TRACE_POINTS
38#define TRACE_INCLUDE_PATH ../../include/ras
39#include <ras/ras_event.h> 36#include <ras/ras_event.h>
40 37
41/* lock to memory controller's control array */ 38/* lock to memory controller's control array */
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 1491dd4f08f9..437e6fd47311 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -34,6 +34,9 @@
34#include <linux/aer.h> 34#include <linux/aer.h>
35 35
36#define INDENT_SP " " 36#define INDENT_SP " "
37
38static char rcd_decode_str[CPER_REC_LEN];
39
37/* 40/*
38 * CPER record ID need to be unique even after reboot, because record 41 * CPER record ID need to be unique even after reboot, because record
39 * ID is used as index for ERST storage, while CPER records from 42 * ID is used as index for ERST storage, while CPER records from
@@ -50,18 +53,19 @@ u64 cper_next_record_id(void)
50} 53}
51EXPORT_SYMBOL_GPL(cper_next_record_id); 54EXPORT_SYMBOL_GPL(cper_next_record_id);
52 55
53static const char *cper_severity_strs[] = { 56static const char * const severity_strs[] = {
54 "recoverable", 57 "recoverable",
55 "fatal", 58 "fatal",
56 "corrected", 59 "corrected",
57 "info", 60 "info",
58}; 61};
59 62
60static const char *cper_severity_str(unsigned int severity) 63const char *cper_severity_str(unsigned int severity)
61{ 64{
62 return severity < ARRAY_SIZE(cper_severity_strs) ? 65 return severity < ARRAY_SIZE(severity_strs) ?
63 cper_severity_strs[severity] : "unknown"; 66 severity_strs[severity] : "unknown";
64} 67}
68EXPORT_SYMBOL_GPL(cper_severity_str);
65 69
66/* 70/*
67 * cper_print_bits - print strings for set bits 71 * cper_print_bits - print strings for set bits
@@ -100,32 +104,32 @@ void cper_print_bits(const char *pfx, unsigned int bits,
100 printk("%s\n", buf); 104 printk("%s\n", buf);
101} 105}
102 106
103static const char * const cper_proc_type_strs[] = { 107static const char * const proc_type_strs[] = {
104 "IA32/X64", 108 "IA32/X64",
105 "IA64", 109 "IA64",
106}; 110};
107 111
108static const char * const cper_proc_isa_strs[] = { 112static const char * const proc_isa_strs[] = {
109 "IA32", 113 "IA32",
110 "IA64", 114 "IA64",
111 "X64", 115 "X64",
112}; 116};
113 117
114static const char * const cper_proc_error_type_strs[] = { 118static const char * const proc_error_type_strs[] = {
115 "cache error", 119 "cache error",
116 "TLB error", 120 "TLB error",
117 "bus error", 121 "bus error",
118 "micro-architectural error", 122 "micro-architectural error",
119}; 123};
120 124
121static const char * const cper_proc_op_strs[] = { 125static const char * const proc_op_strs[] = {
122 "unknown or generic", 126 "unknown or generic",
123 "data read", 127 "data read",
124 "data write", 128 "data write",
125 "instruction execution", 129 "instruction execution",
126}; 130};
127 131
128static const char * const cper_proc_flag_strs[] = { 132static const char * const proc_flag_strs[] = {
129 "restartable", 133 "restartable",
130 "precise IP", 134 "precise IP",
131 "overflow", 135 "overflow",
@@ -137,26 +141,26 @@ static void cper_print_proc_generic(const char *pfx,
137{ 141{
138 if (proc->validation_bits & CPER_PROC_VALID_TYPE) 142 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
139 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type, 143 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
140 proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ? 144 proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
141 cper_proc_type_strs[proc->proc_type] : "unknown"); 145 proc_type_strs[proc->proc_type] : "unknown");
142 if (proc->validation_bits & CPER_PROC_VALID_ISA) 146 if (proc->validation_bits & CPER_PROC_VALID_ISA)
143 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa, 147 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
144 proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ? 148 proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
145 cper_proc_isa_strs[proc->proc_isa] : "unknown"); 149 proc_isa_strs[proc->proc_isa] : "unknown");
146 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) { 150 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
147 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type); 151 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
148 cper_print_bits(pfx, proc->proc_error_type, 152 cper_print_bits(pfx, proc->proc_error_type,
149 cper_proc_error_type_strs, 153 proc_error_type_strs,
150 ARRAY_SIZE(cper_proc_error_type_strs)); 154 ARRAY_SIZE(proc_error_type_strs));
151 } 155 }
152 if (proc->validation_bits & CPER_PROC_VALID_OPERATION) 156 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
153 printk("%s""operation: %d, %s\n", pfx, proc->operation, 157 printk("%s""operation: %d, %s\n", pfx, proc->operation,
154 proc->operation < ARRAY_SIZE(cper_proc_op_strs) ? 158 proc->operation < ARRAY_SIZE(proc_op_strs) ?
155 cper_proc_op_strs[proc->operation] : "unknown"); 159 proc_op_strs[proc->operation] : "unknown");
156 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) { 160 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
157 printk("%s""flags: 0x%02x\n", pfx, proc->flags); 161 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
158 cper_print_bits(pfx, proc->flags, cper_proc_flag_strs, 162 cper_print_bits(pfx, proc->flags, proc_flag_strs,
159 ARRAY_SIZE(cper_proc_flag_strs)); 163 ARRAY_SIZE(proc_flag_strs));
160 } 164 }
161 if (proc->validation_bits & CPER_PROC_VALID_LEVEL) 165 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
162 printk("%s""level: %d\n", pfx, proc->level); 166 printk("%s""level: %d\n", pfx, proc->level);
@@ -177,7 +181,7 @@ static void cper_print_proc_generic(const char *pfx,
177 printk("%s""IP: 0x%016llx\n", pfx, proc->ip); 181 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
178} 182}
179 183
180static const char *cper_mem_err_type_strs[] = { 184static const char * const mem_err_type_strs[] = {
181 "unknown", 185 "unknown",
182 "no error", 186 "no error",
183 "single-bit ECC", 187 "single-bit ECC",
@@ -196,58 +200,136 @@ static const char *cper_mem_err_type_strs[] = {
196 "physical memory map-out event", 200 "physical memory map-out event",
197}; 201};
198 202
199static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) 203const char *cper_mem_err_type_str(unsigned int etype)
200{ 204{
201 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) 205 return etype < ARRAY_SIZE(mem_err_type_strs) ?
202 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); 206 mem_err_type_strs[etype] : "unknown";
203 if (mem->validation_bits & CPER_MEM_VALID_PA) 207}
204 printk("%s""physical_address: 0x%016llx\n", 208EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
205 pfx, mem->physical_addr); 209
206 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) 210static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
207 printk("%s""physical_address_mask: 0x%016llx\n", 211{
208 pfx, mem->physical_addr_mask); 212 u32 len, n;
213
214 if (!msg)
215 return 0;
216
217 n = 0;
218 len = CPER_REC_LEN - 1;
209 if (mem->validation_bits & CPER_MEM_VALID_NODE) 219 if (mem->validation_bits & CPER_MEM_VALID_NODE)
210 pr_debug("node: %d\n", mem->node); 220 n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
211 if (mem->validation_bits & CPER_MEM_VALID_CARD) 221 if (mem->validation_bits & CPER_MEM_VALID_CARD)
212 pr_debug("card: %d\n", mem->card); 222 n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
213 if (mem->validation_bits & CPER_MEM_VALID_MODULE) 223 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
214 pr_debug("module: %d\n", mem->module); 224 n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
215 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) 225 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
216 pr_debug("rank: %d\n", mem->rank); 226 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
217 if (mem->validation_bits & CPER_MEM_VALID_BANK) 227 if (mem->validation_bits & CPER_MEM_VALID_BANK)
218 pr_debug("bank: %d\n", mem->bank); 228 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
219 if (mem->validation_bits & CPER_MEM_VALID_DEVICE) 229 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
220 pr_debug("device: %d\n", mem->device); 230 n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
221 if (mem->validation_bits & CPER_MEM_VALID_ROW) 231 if (mem->validation_bits & CPER_MEM_VALID_ROW)
222 pr_debug("row: %d\n", mem->row); 232 n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
223 if (mem->validation_bits & CPER_MEM_VALID_COLUMN) 233 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
224 pr_debug("column: %d\n", mem->column); 234 n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
225 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) 235 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
226 pr_debug("bit_position: %d\n", mem->bit_pos); 236 n += scnprintf(msg + n, len - n, "bit_position: %d ",
237 mem->bit_pos);
227 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) 238 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
228 pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id); 239 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
240 mem->requestor_id);
229 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) 241 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
230 pr_debug("responder_id: 0x%016llx\n", mem->responder_id); 242 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
243 mem->responder_id);
231 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) 244 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
232 pr_debug("target_id: 0x%016llx\n", mem->target_id); 245 scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
246 mem->target_id);
247
248 msg[n] = '\0';
249 return n;
250}
251
252static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
253{
254 u32 len, n;
255 const char *bank = NULL, *device = NULL;
256
257 if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
258 return 0;
259
260 n = 0;
261 len = CPER_REC_LEN - 1;
262 dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
263 if (bank && device)
264 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
265 else
266 n = snprintf(msg, len,
267 "DIMM location: not present. DMI handle: 0x%.4x ",
268 mem->mem_dev_handle);
269
270 msg[n] = '\0';
271 return n;
272}
273
274void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
275 struct cper_mem_err_compact *cmem)
276{
277 cmem->validation_bits = mem->validation_bits;
278 cmem->node = mem->node;
279 cmem->card = mem->card;
280 cmem->module = mem->module;
281 cmem->bank = mem->bank;
282 cmem->device = mem->device;
283 cmem->row = mem->row;
284 cmem->column = mem->column;
285 cmem->bit_pos = mem->bit_pos;
286 cmem->requestor_id = mem->requestor_id;
287 cmem->responder_id = mem->responder_id;
288 cmem->target_id = mem->target_id;
289 cmem->rank = mem->rank;
290 cmem->mem_array_handle = mem->mem_array_handle;
291 cmem->mem_dev_handle = mem->mem_dev_handle;
292}
293
294const char *cper_mem_err_unpack(struct trace_seq *p,
295 struct cper_mem_err_compact *cmem)
296{
297 const char *ret = p->buffer + p->len;
298
299 if (cper_mem_err_location(cmem, rcd_decode_str))
300 trace_seq_printf(p, "%s", rcd_decode_str);
301 if (cper_dimm_err_location(cmem, rcd_decode_str))
302 trace_seq_printf(p, "%s", rcd_decode_str);
303 trace_seq_putc(p, '\0');
304
305 return ret;
306}
307
308static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
309{
310 struct cper_mem_err_compact cmem;
311
312 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
313 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
314 if (mem->validation_bits & CPER_MEM_VALID_PA)
315 printk("%s""physical_address: 0x%016llx\n",
316 pfx, mem->physical_addr);
317 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
318 printk("%s""physical_address_mask: 0x%016llx\n",
319 pfx, mem->physical_addr_mask);
320 cper_mem_err_pack(mem, &cmem);
321 if (cper_mem_err_location(&cmem, rcd_decode_str))
322 printk("%s%s\n", pfx, rcd_decode_str);
233 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { 323 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
234 u8 etype = mem->error_type; 324 u8 etype = mem->error_type;
235 printk("%s""error_type: %d, %s\n", pfx, etype, 325 printk("%s""error_type: %d, %s\n", pfx, etype,
236 etype < ARRAY_SIZE(cper_mem_err_type_strs) ? 326 cper_mem_err_type_str(etype));
237 cper_mem_err_type_strs[etype] : "unknown");
238 }
239 if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
240 const char *bank = NULL, *device = NULL;
241 dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
242 if (bank != NULL && device != NULL)
243 printk("%s""DIMM location: %s %s", pfx, bank, device);
244 else
245 printk("%s""DIMM DMI handle: 0x%.4x",
246 pfx, mem->mem_dev_handle);
247 } 327 }
328 if (cper_dimm_err_location(&cmem, rcd_decode_str))
329 printk("%s%s\n", pfx, rcd_decode_str);
248} 330}
249 331
250static const char *cper_pcie_port_type_strs[] = { 332static const char * const pcie_port_type_strs[] = {
251 "PCIe end point", 333 "PCIe end point",
252 "legacy PCI end point", 334 "legacy PCI end point",
253 "unknown", 335 "unknown",
@@ -266,8 +348,8 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
266{ 348{
267 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) 349 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
268 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, 350 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
269 pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ? 351 pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
270 cper_pcie_port_type_strs[pcie->port_type] : "unknown"); 352 pcie_port_type_strs[pcie->port_type] : "unknown");
271 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION) 353 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
272 printk("%s""version: %d.%d\n", pfx, 354 printk("%s""version: %d.%d\n", pfx,
273 pcie->version.major, pcie->version.minor); 355 pcie->version.major, pcie->version.minor);
diff --git a/drivers/pci/pcie/aer/Kconfig b/drivers/pci/pcie/aer/Kconfig
index 50e94e02378a..389440228c1d 100644
--- a/drivers/pci/pcie/aer/Kconfig
+++ b/drivers/pci/pcie/aer/Kconfig
@@ -5,6 +5,7 @@
5config PCIEAER 5config PCIEAER
6 boolean "Root Port Advanced Error Reporting support" 6 boolean "Root Port Advanced Error Reporting support"
7 depends on PCIEPORTBUS 7 depends on PCIEPORTBUS
8 select RAS
8 default y 9 default y
9 help 10 help
10 This enables PCI Express Root Port Advanced Error Reporting 11 This enables PCI Express Root Port Advanced Error Reporting
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 36ed31b52198..35d06e177917 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -22,9 +22,7 @@
22#include <linux/cper.h> 22#include <linux/cper.h>
23 23
24#include "aerdrv.h" 24#include "aerdrv.h"
25 25#include <ras/ras_event.h>
26#define CREATE_TRACE_POINTS
27#include <trace/events/ras.h>
28 26
29#define AER_AGENT_RECEIVER 0 27#define AER_AGENT_RECEIVER 0
30#define AER_AGENT_REQUESTER 1 28#define AER_AGENT_REQUESTER 1
diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig
new file mode 100644
index 000000000000..f9da613052c2
--- /dev/null
+++ b/drivers/ras/Kconfig
@@ -0,0 +1,2 @@
1config RAS
2 bool
diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile
new file mode 100644
index 000000000000..d7f73341ced3
--- /dev/null
+++ b/drivers/ras/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_RAS) += ras.o debugfs.o
diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c
new file mode 100644
index 000000000000..0322acf67ea5
--- /dev/null
+++ b/drivers/ras/debugfs.c
@@ -0,0 +1,56 @@
1#include <linux/debugfs.h>
2
3static struct dentry *ras_debugfs_dir;
4
5static atomic_t trace_count = ATOMIC_INIT(0);
6
7int ras_userspace_consumers(void)
8{
9 return atomic_read(&trace_count);
10}
11EXPORT_SYMBOL_GPL(ras_userspace_consumers);
12
13static int trace_show(struct seq_file *m, void *v)
14{
15 return atomic_read(&trace_count);
16}
17
18static int trace_open(struct inode *inode, struct file *file)
19{
20 atomic_inc(&trace_count);
21 return single_open(file, trace_show, NULL);
22}
23
24static int trace_release(struct inode *inode, struct file *file)
25{
26 atomic_dec(&trace_count);
27 return single_release(inode, file);
28}
29
30static const struct file_operations trace_fops = {
31 .open = trace_open,
32 .read = seq_read,
33 .llseek = seq_lseek,
34 .release = trace_release,
35};
36
37int __init ras_add_daemon_trace(void)
38{
39 struct dentry *fentry;
40
41 if (!ras_debugfs_dir)
42 return -ENOENT;
43
44 fentry = debugfs_create_file("daemon_active", S_IRUSR, ras_debugfs_dir,
45 NULL, &trace_fops);
46 if (!fentry)
47 return -ENODEV;
48
49 return 0;
50
51}
52
53void __init ras_debugfs_init(void)
54{
55 ras_debugfs_dir = debugfs_create_dir("ras", NULL);
56}
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
new file mode 100644
index 000000000000..b67dd362b7b6
--- /dev/null
+++ b/drivers/ras/ras.c
@@ -0,0 +1,29 @@
1/*
2 * Copyright (C) 2014 Intel Corporation
3 *
4 * Authors:
5 * Chen, Gong <gong.chen@linux.intel.com>
6 */
7
8#include <linux/init.h>
9#include <linux/ras.h>
10
11#define CREATE_TRACE_POINTS
12#define TRACE_INCLUDE_PATH ../../include/ras
13#include <ras/ras_event.h>
14
15static int __init ras_init(void)
16{
17 int rc = 0;
18
19 ras_debugfs_init();
20 rc = ras_add_daemon_trace();
21
22 return rc;
23}
24subsys_initcall(ras_init);
25
26#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE)
27EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
28#endif
29EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);