aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/acpi/apei/ghes.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/acpi/apei/ghes.c')
-rw-r--r--drivers/acpi/apei/ghes.c173
1 files changed, 110 insertions, 63 deletions
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index dab7cb7349df..e05d84e7b06d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -47,11 +47,11 @@
47#include <linux/genalloc.h> 47#include <linux/genalloc.h>
48#include <linux/pci.h> 48#include <linux/pci.h>
49#include <linux/aer.h> 49#include <linux/aer.h>
50#include <linux/nmi.h>
50 51
51#include <acpi/ghes.h> 52#include <acpi/ghes.h>
52#include <asm/mce.h> 53#include <acpi/apei.h>
53#include <asm/tlbflush.h> 54#include <asm/tlbflush.h>
54#include <asm/nmi.h>
55 55
56#include "apei-internal.h" 56#include "apei-internal.h"
57 57
@@ -86,8 +86,6 @@
86bool ghes_disable; 86bool ghes_disable;
87module_param_named(disable, ghes_disable, bool, 0); 87module_param_named(disable, ghes_disable, bool, 0);
88 88
89static int ghes_panic_timeout __read_mostly = 30;
90
91/* 89/*
92 * All error sources notified with SCI shares one notifier function, 90 * All error sources notified with SCI shares one notifier function,
93 * so they need to be linked and checked one by one. This is applied 91 * so they need to be linked and checked one by one. This is applied
@@ -97,16 +95,9 @@ static int ghes_panic_timeout __read_mostly = 30;
97 * list changing, not for traversing. 95 * list changing, not for traversing.
98 */ 96 */
99static LIST_HEAD(ghes_sci); 97static LIST_HEAD(ghes_sci);
100static LIST_HEAD(ghes_nmi);
101static DEFINE_MUTEX(ghes_list_mutex); 98static DEFINE_MUTEX(ghes_list_mutex);
102 99
103/* 100/*
104 * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
105 * mutual exclusion.
106 */
107static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
108
109/*
110 * Because the memory area used to transfer hardware error information 101 * Because the memory area used to transfer hardware error information
111 * from BIOS to Linux can be determined only in NMI, IRQ or timer 102 * from BIOS to Linux can be determined only in NMI, IRQ or timer
112 * handler, but general ioremap can not be used in atomic context, so 103 * handler, but general ioremap can not be used in atomic context, so
@@ -114,12 +105,16 @@ static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
114 */ 105 */
115 106
116/* 107/*
117 * Two virtual pages are used, one for NMI context, the other for 108 * Two virtual pages are used, one for IRQ/PROCESS context, the other for
118 * IRQ/PROCESS context 109 * NMI context (optionally).
119 */ 110 */
120#define GHES_IOREMAP_PAGES 2 111#ifdef CONFIG_HAVE_ACPI_APEI_NMI
121#define GHES_IOREMAP_NMI_PAGE(base) (base) 112#define GHES_IOREMAP_PAGES 2
122#define GHES_IOREMAP_IRQ_PAGE(base) ((base) + PAGE_SIZE) 113#else
114#define GHES_IOREMAP_PAGES 1
115#endif
116#define GHES_IOREMAP_IRQ_PAGE(base) (base)
117#define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE)
123 118
124/* virtual memory area for atomic ioremap */ 119/* virtual memory area for atomic ioremap */
125static struct vm_struct *ghes_ioremap_area; 120static struct vm_struct *ghes_ioremap_area;
@@ -130,18 +125,8 @@ static struct vm_struct *ghes_ioremap_area;
130static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); 125static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
131static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); 126static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
132 127
133/*
134 * printk is not safe in NMI context. So in NMI handler, we allocate
135 * required memory from lock-less memory allocator
136 * (ghes_estatus_pool), save estatus into it, put them into lock-less
137 * list (ghes_estatus_llist), then delay printk into IRQ context via
138 * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
139 * required pool size by all NMI error source.
140 */
141static struct gen_pool *ghes_estatus_pool; 128static struct gen_pool *ghes_estatus_pool;
142static unsigned long ghes_estatus_pool_size_request; 129static unsigned long ghes_estatus_pool_size_request;
143static struct llist_head ghes_estatus_llist;
144static struct irq_work ghes_proc_irq_work;
145 130
146struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; 131struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
147static atomic_t ghes_estatus_cache_alloced; 132static atomic_t ghes_estatus_cache_alloced;
@@ -192,7 +177,7 @@ static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
192 177
193 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base)); 178 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
194 unmap_kernel_range_noflush(vaddr, PAGE_SIZE); 179 unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
195 __flush_tlb_one(vaddr); 180 arch_apei_flush_tlb_one(vaddr);
196} 181}
197 182
198static void ghes_iounmap_irq(void __iomem *vaddr_ptr) 183static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
@@ -202,7 +187,7 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
202 187
203 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base)); 188 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
204 unmap_kernel_range_noflush(vaddr, PAGE_SIZE); 189 unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
205 __flush_tlb_one(vaddr); 190 arch_apei_flush_tlb_one(vaddr);
206} 191}
207 192
208static int ghes_estatus_pool_init(void) 193static int ghes_estatus_pool_init(void)
@@ -249,11 +234,6 @@ static int ghes_estatus_pool_expand(unsigned long len)
249 return 0; 234 return 0;
250} 235}
251 236
252static void ghes_estatus_pool_shrink(unsigned long len)
253{
254 ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
255}
256
257static struct ghes *ghes_new(struct acpi_hest_generic *generic) 237static struct ghes *ghes_new(struct acpi_hest_generic *generic)
258{ 238{
259 struct ghes *ghes; 239 struct ghes *ghes;
@@ -455,9 +435,7 @@ static void ghes_do_proc(struct ghes *ghes,
455 mem_err = (struct cper_sec_mem_err *)(gdata+1); 435 mem_err = (struct cper_sec_mem_err *)(gdata+1);
456 ghes_edac_report_mem_error(ghes, sev, mem_err); 436 ghes_edac_report_mem_error(ghes, sev, mem_err);
457 437
458#ifdef CONFIG_X86_MCE 438 arch_apei_report_mem_error(sev, mem_err);
459 apei_mce_report_mem_error(sev, mem_err);
460#endif
461 ghes_handle_memory_failure(gdata, sev); 439 ghes_handle_memory_failure(gdata, sev);
462 } 440 }
463#ifdef CONFIG_ACPI_APEI_PCIEAER 441#ifdef CONFIG_ACPI_APEI_PCIEAER
@@ -734,6 +712,32 @@ static int ghes_notify_sci(struct notifier_block *this,
734 return ret; 712 return ret;
735} 713}
736 714
715static struct notifier_block ghes_notifier_sci = {
716 .notifier_call = ghes_notify_sci,
717};
718
719#ifdef CONFIG_HAVE_ACPI_APEI_NMI
720/*
721 * printk is not safe in NMI context. So in NMI handler, we allocate
722 * required memory from lock-less memory allocator
723 * (ghes_estatus_pool), save estatus into it, put them into lock-less
724 * list (ghes_estatus_llist), then delay printk into IRQ context via
725 * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
726 * required pool size by all NMI error source.
727 */
728static struct llist_head ghes_estatus_llist;
729static struct irq_work ghes_proc_irq_work;
730
731/*
732 * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
733 * mutual exclusion.
734 */
735static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
736
737static LIST_HEAD(ghes_nmi);
738
739static int ghes_panic_timeout __read_mostly = 30;
740
737static struct llist_node *llist_nodes_reverse(struct llist_node *llnode) 741static struct llist_node *llist_nodes_reverse(struct llist_node *llnode)
738{ 742{
739 struct llist_node *next, *tail = NULL; 743 struct llist_node *next, *tail = NULL;
@@ -877,10 +881,6 @@ out:
877 return ret; 881 return ret;
878} 882}
879 883
880static struct notifier_block ghes_notifier_sci = {
881 .notifier_call = ghes_notify_sci,
882};
883
884static unsigned long ghes_esource_prealloc_size( 884static unsigned long ghes_esource_prealloc_size(
885 const struct acpi_hest_generic *generic) 885 const struct acpi_hest_generic *generic)
886{ 886{
@@ -896,11 +896,71 @@ static unsigned long ghes_esource_prealloc_size(
896 return prealloc_size; 896 return prealloc_size;
897} 897}
898 898
899static void ghes_estatus_pool_shrink(unsigned long len)
900{
901 ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
902}
903
904static void ghes_nmi_add(struct ghes *ghes)
905{
906 unsigned long len;
907
908 len = ghes_esource_prealloc_size(ghes->generic);
909 ghes_estatus_pool_expand(len);
910 mutex_lock(&ghes_list_mutex);
911 if (list_empty(&ghes_nmi))
912 register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
913 list_add_rcu(&ghes->list, &ghes_nmi);
914 mutex_unlock(&ghes_list_mutex);
915}
916
917static void ghes_nmi_remove(struct ghes *ghes)
918{
919 unsigned long len;
920
921 mutex_lock(&ghes_list_mutex);
922 list_del_rcu(&ghes->list);
923 if (list_empty(&ghes_nmi))
924 unregister_nmi_handler(NMI_LOCAL, "ghes");
925 mutex_unlock(&ghes_list_mutex);
926 /*
927 * To synchronize with NMI handler, ghes can only be
928 * freed after NMI handler finishes.
929 */
930 synchronize_rcu();
931 len = ghes_esource_prealloc_size(ghes->generic);
932 ghes_estatus_pool_shrink(len);
933}
934
935static void ghes_nmi_init_cxt(void)
936{
937 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
938}
939#else /* CONFIG_HAVE_ACPI_APEI_NMI */
940static inline void ghes_nmi_add(struct ghes *ghes)
941{
942 pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n",
943 ghes->generic->header.source_id);
944 BUG();
945}
946
947static inline void ghes_nmi_remove(struct ghes *ghes)
948{
949 pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n",
950 ghes->generic->header.source_id);
951 BUG();
952}
953
954static inline void ghes_nmi_init_cxt(void)
955{
956}
957#endif /* CONFIG_HAVE_ACPI_APEI_NMI */
958
899static int ghes_probe(struct platform_device *ghes_dev) 959static int ghes_probe(struct platform_device *ghes_dev)
900{ 960{
901 struct acpi_hest_generic *generic; 961 struct acpi_hest_generic *generic;
902 struct ghes *ghes = NULL; 962 struct ghes *ghes = NULL;
903 unsigned long len; 963
904 int rc = -EINVAL; 964 int rc = -EINVAL;
905 965
906 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; 966 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
@@ -911,7 +971,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
911 case ACPI_HEST_NOTIFY_POLLED: 971 case ACPI_HEST_NOTIFY_POLLED:
912 case ACPI_HEST_NOTIFY_EXTERNAL: 972 case ACPI_HEST_NOTIFY_EXTERNAL:
913 case ACPI_HEST_NOTIFY_SCI: 973 case ACPI_HEST_NOTIFY_SCI:
974 break;
914 case ACPI_HEST_NOTIFY_NMI: 975 case ACPI_HEST_NOTIFY_NMI:
976 if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
977 pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
978 generic->header.source_id);
979 goto err;
980 }
915 break; 981 break;
916 case ACPI_HEST_NOTIFY_LOCAL: 982 case ACPI_HEST_NOTIFY_LOCAL:
917 pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", 983 pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
@@ -972,14 +1038,7 @@ static int ghes_probe(struct platform_device *ghes_dev)
972 mutex_unlock(&ghes_list_mutex); 1038 mutex_unlock(&ghes_list_mutex);
973 break; 1039 break;
974 case ACPI_HEST_NOTIFY_NMI: 1040 case ACPI_HEST_NOTIFY_NMI:
975 len = ghes_esource_prealloc_size(generic); 1041 ghes_nmi_add(ghes);
976 ghes_estatus_pool_expand(len);
977 mutex_lock(&ghes_list_mutex);
978 if (list_empty(&ghes_nmi))
979 register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0,
980 "ghes");
981 list_add_rcu(&ghes->list, &ghes_nmi);
982 mutex_unlock(&ghes_list_mutex);
983 break; 1042 break;
984 default: 1043 default:
985 BUG(); 1044 BUG();
@@ -1001,7 +1060,6 @@ static int ghes_remove(struct platform_device *ghes_dev)
1001{ 1060{
1002 struct ghes *ghes; 1061 struct ghes *ghes;
1003 struct acpi_hest_generic *generic; 1062 struct acpi_hest_generic *generic;
1004 unsigned long len;
1005 1063
1006 ghes = platform_get_drvdata(ghes_dev); 1064 ghes = platform_get_drvdata(ghes_dev);
1007 generic = ghes->generic; 1065 generic = ghes->generic;
@@ -1022,18 +1080,7 @@ static int ghes_remove(struct platform_device *ghes_dev)
1022 mutex_unlock(&ghes_list_mutex); 1080 mutex_unlock(&ghes_list_mutex);
1023 break; 1081 break;
1024 case ACPI_HEST_NOTIFY_NMI: 1082 case ACPI_HEST_NOTIFY_NMI:
1025 mutex_lock(&ghes_list_mutex); 1083 ghes_nmi_remove(ghes);
1026 list_del_rcu(&ghes->list);
1027 if (list_empty(&ghes_nmi))
1028 unregister_nmi_handler(NMI_LOCAL, "ghes");
1029 mutex_unlock(&ghes_list_mutex);
1030 /*
1031 * To synchronize with NMI handler, ghes can only be
1032 * freed after NMI handler finishes.
1033 */
1034 synchronize_rcu();
1035 len = ghes_esource_prealloc_size(generic);
1036 ghes_estatus_pool_shrink(len);
1037 break; 1084 break;
1038 default: 1085 default:
1039 BUG(); 1086 BUG();
@@ -1077,7 +1124,7 @@ static int __init ghes_init(void)
1077 return -EINVAL; 1124 return -EINVAL;
1078 } 1125 }
1079 1126
1080 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); 1127 ghes_nmi_init_cxt();
1081 1128
1082 rc = ghes_ioremap_init(); 1129 rc = ghes_ioremap_init();
1083 if (rc) 1130 if (rc)