diff options
Diffstat (limited to 'drivers/acpi/apei/ghes.c')
-rw-r--r-- | drivers/acpi/apei/ghes.c | 173 |
1 files changed, 110 insertions, 63 deletions
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index dab7cb7349df..e05d84e7b06d 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c | |||
@@ -47,11 +47,11 @@ | |||
47 | #include <linux/genalloc.h> | 47 | #include <linux/genalloc.h> |
48 | #include <linux/pci.h> | 48 | #include <linux/pci.h> |
49 | #include <linux/aer.h> | 49 | #include <linux/aer.h> |
50 | #include <linux/nmi.h> | ||
50 | 51 | ||
51 | #include <acpi/ghes.h> | 52 | #include <acpi/ghes.h> |
52 | #include <asm/mce.h> | 53 | #include <acpi/apei.h> |
53 | #include <asm/tlbflush.h> | 54 | #include <asm/tlbflush.h> |
54 | #include <asm/nmi.h> | ||
55 | 55 | ||
56 | #include "apei-internal.h" | 56 | #include "apei-internal.h" |
57 | 57 | ||
@@ -86,8 +86,6 @@ | |||
86 | bool ghes_disable; | 86 | bool ghes_disable; |
87 | module_param_named(disable, ghes_disable, bool, 0); | 87 | module_param_named(disable, ghes_disable, bool, 0); |
88 | 88 | ||
89 | static int ghes_panic_timeout __read_mostly = 30; | ||
90 | |||
91 | /* | 89 | /* |
92 | * All error sources notified with SCI shares one notifier function, | 90 | * All error sources notified with SCI shares one notifier function, |
93 | * so they need to be linked and checked one by one. This is applied | 91 | * so they need to be linked and checked one by one. This is applied |
@@ -97,16 +95,9 @@ static int ghes_panic_timeout __read_mostly = 30; | |||
97 | * list changing, not for traversing. | 95 | * list changing, not for traversing. |
98 | */ | 96 | */ |
99 | static LIST_HEAD(ghes_sci); | 97 | static LIST_HEAD(ghes_sci); |
100 | static LIST_HEAD(ghes_nmi); | ||
101 | static DEFINE_MUTEX(ghes_list_mutex); | 98 | static DEFINE_MUTEX(ghes_list_mutex); |
102 | 99 | ||
103 | /* | 100 | /* |
104 | * NMI may be triggered on any CPU, so ghes_nmi_lock is used for | ||
105 | * mutual exclusion. | ||
106 | */ | ||
107 | static DEFINE_RAW_SPINLOCK(ghes_nmi_lock); | ||
108 | |||
109 | /* | ||
110 | * Because the memory area used to transfer hardware error information | 101 | * Because the memory area used to transfer hardware error information |
111 | * from BIOS to Linux can be determined only in NMI, IRQ or timer | 102 | * from BIOS to Linux can be determined only in NMI, IRQ or timer |
112 | * handler, but general ioremap can not be used in atomic context, so | 103 | * handler, but general ioremap can not be used in atomic context, so |
@@ -114,12 +105,16 @@ static DEFINE_RAW_SPINLOCK(ghes_nmi_lock); | |||
114 | */ | 105 | */ |
115 | 106 | ||
116 | /* | 107 | /* |
117 | * Two virtual pages are used, one for NMI context, the other for | 108 | * Two virtual pages are used, one for IRQ/PROCESS context, the other for |
118 | * IRQ/PROCESS context | 109 | * NMI context (optionally). |
119 | */ | 110 | */ |
120 | #define GHES_IOREMAP_PAGES 2 | 111 | #ifdef CONFIG_HAVE_ACPI_APEI_NMI |
121 | #define GHES_IOREMAP_NMI_PAGE(base) (base) | 112 | #define GHES_IOREMAP_PAGES 2 |
122 | #define GHES_IOREMAP_IRQ_PAGE(base) ((base) + PAGE_SIZE) | 113 | #else |
114 | #define GHES_IOREMAP_PAGES 1 | ||
115 | #endif | ||
116 | #define GHES_IOREMAP_IRQ_PAGE(base) (base) | ||
117 | #define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE) | ||
123 | 118 | ||
124 | /* virtual memory area for atomic ioremap */ | 119 | /* virtual memory area for atomic ioremap */ |
125 | static struct vm_struct *ghes_ioremap_area; | 120 | static struct vm_struct *ghes_ioremap_area; |
@@ -130,18 +125,8 @@ static struct vm_struct *ghes_ioremap_area; | |||
130 | static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); | 125 | static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); |
131 | static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); | 126 | static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); |
132 | 127 | ||
133 | /* | ||
134 | * printk is not safe in NMI context. So in NMI handler, we allocate | ||
135 | * required memory from lock-less memory allocator | ||
136 | * (ghes_estatus_pool), save estatus into it, put them into lock-less | ||
137 | * list (ghes_estatus_llist), then delay printk into IRQ context via | ||
138 | * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record | ||
139 | * required pool size by all NMI error source. | ||
140 | */ | ||
141 | static struct gen_pool *ghes_estatus_pool; | 128 | static struct gen_pool *ghes_estatus_pool; |
142 | static unsigned long ghes_estatus_pool_size_request; | 129 | static unsigned long ghes_estatus_pool_size_request; |
143 | static struct llist_head ghes_estatus_llist; | ||
144 | static struct irq_work ghes_proc_irq_work; | ||
145 | 130 | ||
146 | struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; | 131 | struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; |
147 | static atomic_t ghes_estatus_cache_alloced; | 132 | static atomic_t ghes_estatus_cache_alloced; |
@@ -192,7 +177,7 @@ static void ghes_iounmap_nmi(void __iomem *vaddr_ptr) | |||
192 | 177 | ||
193 | BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base)); | 178 | BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base)); |
194 | unmap_kernel_range_noflush(vaddr, PAGE_SIZE); | 179 | unmap_kernel_range_noflush(vaddr, PAGE_SIZE); |
195 | __flush_tlb_one(vaddr); | 180 | arch_apei_flush_tlb_one(vaddr); |
196 | } | 181 | } |
197 | 182 | ||
198 | static void ghes_iounmap_irq(void __iomem *vaddr_ptr) | 183 | static void ghes_iounmap_irq(void __iomem *vaddr_ptr) |
@@ -202,7 +187,7 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr) | |||
202 | 187 | ||
203 | BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base)); | 188 | BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base)); |
204 | unmap_kernel_range_noflush(vaddr, PAGE_SIZE); | 189 | unmap_kernel_range_noflush(vaddr, PAGE_SIZE); |
205 | __flush_tlb_one(vaddr); | 190 | arch_apei_flush_tlb_one(vaddr); |
206 | } | 191 | } |
207 | 192 | ||
208 | static int ghes_estatus_pool_init(void) | 193 | static int ghes_estatus_pool_init(void) |
@@ -249,11 +234,6 @@ static int ghes_estatus_pool_expand(unsigned long len) | |||
249 | return 0; | 234 | return 0; |
250 | } | 235 | } |
251 | 236 | ||
252 | static void ghes_estatus_pool_shrink(unsigned long len) | ||
253 | { | ||
254 | ghes_estatus_pool_size_request -= PAGE_ALIGN(len); | ||
255 | } | ||
256 | |||
257 | static struct ghes *ghes_new(struct acpi_hest_generic *generic) | 237 | static struct ghes *ghes_new(struct acpi_hest_generic *generic) |
258 | { | 238 | { |
259 | struct ghes *ghes; | 239 | struct ghes *ghes; |
@@ -455,9 +435,7 @@ static void ghes_do_proc(struct ghes *ghes, | |||
455 | mem_err = (struct cper_sec_mem_err *)(gdata+1); | 435 | mem_err = (struct cper_sec_mem_err *)(gdata+1); |
456 | ghes_edac_report_mem_error(ghes, sev, mem_err); | 436 | ghes_edac_report_mem_error(ghes, sev, mem_err); |
457 | 437 | ||
458 | #ifdef CONFIG_X86_MCE | 438 | arch_apei_report_mem_error(sev, mem_err); |
459 | apei_mce_report_mem_error(sev, mem_err); | ||
460 | #endif | ||
461 | ghes_handle_memory_failure(gdata, sev); | 439 | ghes_handle_memory_failure(gdata, sev); |
462 | } | 440 | } |
463 | #ifdef CONFIG_ACPI_APEI_PCIEAER | 441 | #ifdef CONFIG_ACPI_APEI_PCIEAER |
@@ -734,6 +712,32 @@ static int ghes_notify_sci(struct notifier_block *this, | |||
734 | return ret; | 712 | return ret; |
735 | } | 713 | } |
736 | 714 | ||
715 | static struct notifier_block ghes_notifier_sci = { | ||
716 | .notifier_call = ghes_notify_sci, | ||
717 | }; | ||
718 | |||
719 | #ifdef CONFIG_HAVE_ACPI_APEI_NMI | ||
720 | /* | ||
721 | * printk is not safe in NMI context. So in NMI handler, we allocate | ||
722 | * required memory from lock-less memory allocator | ||
723 | * (ghes_estatus_pool), save estatus into it, put them into lock-less | ||
724 | * list (ghes_estatus_llist), then delay printk into IRQ context via | ||
725 | * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record | ||
726 | * required pool size by all NMI error source. | ||
727 | */ | ||
728 | static struct llist_head ghes_estatus_llist; | ||
729 | static struct irq_work ghes_proc_irq_work; | ||
730 | |||
731 | /* | ||
732 | * NMI may be triggered on any CPU, so ghes_nmi_lock is used for | ||
733 | * mutual exclusion. | ||
734 | */ | ||
735 | static DEFINE_RAW_SPINLOCK(ghes_nmi_lock); | ||
736 | |||
737 | static LIST_HEAD(ghes_nmi); | ||
738 | |||
739 | static int ghes_panic_timeout __read_mostly = 30; | ||
740 | |||
737 | static struct llist_node *llist_nodes_reverse(struct llist_node *llnode) | 741 | static struct llist_node *llist_nodes_reverse(struct llist_node *llnode) |
738 | { | 742 | { |
739 | struct llist_node *next, *tail = NULL; | 743 | struct llist_node *next, *tail = NULL; |
@@ -877,10 +881,6 @@ out: | |||
877 | return ret; | 881 | return ret; |
878 | } | 882 | } |
879 | 883 | ||
880 | static struct notifier_block ghes_notifier_sci = { | ||
881 | .notifier_call = ghes_notify_sci, | ||
882 | }; | ||
883 | |||
884 | static unsigned long ghes_esource_prealloc_size( | 884 | static unsigned long ghes_esource_prealloc_size( |
885 | const struct acpi_hest_generic *generic) | 885 | const struct acpi_hest_generic *generic) |
886 | { | 886 | { |
@@ -896,11 +896,71 @@ static unsigned long ghes_esource_prealloc_size( | |||
896 | return prealloc_size; | 896 | return prealloc_size; |
897 | } | 897 | } |
898 | 898 | ||
899 | static void ghes_estatus_pool_shrink(unsigned long len) | ||
900 | { | ||
901 | ghes_estatus_pool_size_request -= PAGE_ALIGN(len); | ||
902 | } | ||
903 | |||
904 | static void ghes_nmi_add(struct ghes *ghes) | ||
905 | { | ||
906 | unsigned long len; | ||
907 | |||
908 | len = ghes_esource_prealloc_size(ghes->generic); | ||
909 | ghes_estatus_pool_expand(len); | ||
910 | mutex_lock(&ghes_list_mutex); | ||
911 | if (list_empty(&ghes_nmi)) | ||
912 | register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes"); | ||
913 | list_add_rcu(&ghes->list, &ghes_nmi); | ||
914 | mutex_unlock(&ghes_list_mutex); | ||
915 | } | ||
916 | |||
917 | static void ghes_nmi_remove(struct ghes *ghes) | ||
918 | { | ||
919 | unsigned long len; | ||
920 | |||
921 | mutex_lock(&ghes_list_mutex); | ||
922 | list_del_rcu(&ghes->list); | ||
923 | if (list_empty(&ghes_nmi)) | ||
924 | unregister_nmi_handler(NMI_LOCAL, "ghes"); | ||
925 | mutex_unlock(&ghes_list_mutex); | ||
926 | /* | ||
927 | * To synchronize with NMI handler, ghes can only be | ||
928 | * freed after NMI handler finishes. | ||
929 | */ | ||
930 | synchronize_rcu(); | ||
931 | len = ghes_esource_prealloc_size(ghes->generic); | ||
932 | ghes_estatus_pool_shrink(len); | ||
933 | } | ||
934 | |||
935 | static void ghes_nmi_init_cxt(void) | ||
936 | { | ||
937 | init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); | ||
938 | } | ||
939 | #else /* CONFIG_HAVE_ACPI_APEI_NMI */ | ||
940 | static inline void ghes_nmi_add(struct ghes *ghes) | ||
941 | { | ||
942 | pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n", | ||
943 | ghes->generic->header.source_id); | ||
944 | BUG(); | ||
945 | } | ||
946 | |||
947 | static inline void ghes_nmi_remove(struct ghes *ghes) | ||
948 | { | ||
949 | pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n", | ||
950 | ghes->generic->header.source_id); | ||
951 | BUG(); | ||
952 | } | ||
953 | |||
954 | static inline void ghes_nmi_init_cxt(void) | ||
955 | { | ||
956 | } | ||
957 | #endif /* CONFIG_HAVE_ACPI_APEI_NMI */ | ||
958 | |||
899 | static int ghes_probe(struct platform_device *ghes_dev) | 959 | static int ghes_probe(struct platform_device *ghes_dev) |
900 | { | 960 | { |
901 | struct acpi_hest_generic *generic; | 961 | struct acpi_hest_generic *generic; |
902 | struct ghes *ghes = NULL; | 962 | struct ghes *ghes = NULL; |
903 | unsigned long len; | 963 | |
904 | int rc = -EINVAL; | 964 | int rc = -EINVAL; |
905 | 965 | ||
906 | generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; | 966 | generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; |
@@ -911,7 +971,13 @@ static int ghes_probe(struct platform_device *ghes_dev) | |||
911 | case ACPI_HEST_NOTIFY_POLLED: | 971 | case ACPI_HEST_NOTIFY_POLLED: |
912 | case ACPI_HEST_NOTIFY_EXTERNAL: | 972 | case ACPI_HEST_NOTIFY_EXTERNAL: |
913 | case ACPI_HEST_NOTIFY_SCI: | 973 | case ACPI_HEST_NOTIFY_SCI: |
974 | break; | ||
914 | case ACPI_HEST_NOTIFY_NMI: | 975 | case ACPI_HEST_NOTIFY_NMI: |
976 | if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { | ||
977 | pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", | ||
978 | generic->header.source_id); | ||
979 | goto err; | ||
980 | } | ||
915 | break; | 981 | break; |
916 | case ACPI_HEST_NOTIFY_LOCAL: | 982 | case ACPI_HEST_NOTIFY_LOCAL: |
917 | pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", | 983 | pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", |
@@ -972,14 +1038,7 @@ static int ghes_probe(struct platform_device *ghes_dev) | |||
972 | mutex_unlock(&ghes_list_mutex); | 1038 | mutex_unlock(&ghes_list_mutex); |
973 | break; | 1039 | break; |
974 | case ACPI_HEST_NOTIFY_NMI: | 1040 | case ACPI_HEST_NOTIFY_NMI: |
975 | len = ghes_esource_prealloc_size(generic); | 1041 | ghes_nmi_add(ghes); |
976 | ghes_estatus_pool_expand(len); | ||
977 | mutex_lock(&ghes_list_mutex); | ||
978 | if (list_empty(&ghes_nmi)) | ||
979 | register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, | ||
980 | "ghes"); | ||
981 | list_add_rcu(&ghes->list, &ghes_nmi); | ||
982 | mutex_unlock(&ghes_list_mutex); | ||
983 | break; | 1042 | break; |
984 | default: | 1043 | default: |
985 | BUG(); | 1044 | BUG(); |
@@ -1001,7 +1060,6 @@ static int ghes_remove(struct platform_device *ghes_dev) | |||
1001 | { | 1060 | { |
1002 | struct ghes *ghes; | 1061 | struct ghes *ghes; |
1003 | struct acpi_hest_generic *generic; | 1062 | struct acpi_hest_generic *generic; |
1004 | unsigned long len; | ||
1005 | 1063 | ||
1006 | ghes = platform_get_drvdata(ghes_dev); | 1064 | ghes = platform_get_drvdata(ghes_dev); |
1007 | generic = ghes->generic; | 1065 | generic = ghes->generic; |
@@ -1022,18 +1080,7 @@ static int ghes_remove(struct platform_device *ghes_dev) | |||
1022 | mutex_unlock(&ghes_list_mutex); | 1080 | mutex_unlock(&ghes_list_mutex); |
1023 | break; | 1081 | break; |
1024 | case ACPI_HEST_NOTIFY_NMI: | 1082 | case ACPI_HEST_NOTIFY_NMI: |
1025 | mutex_lock(&ghes_list_mutex); | 1083 | ghes_nmi_remove(ghes); |
1026 | list_del_rcu(&ghes->list); | ||
1027 | if (list_empty(&ghes_nmi)) | ||
1028 | unregister_nmi_handler(NMI_LOCAL, "ghes"); | ||
1029 | mutex_unlock(&ghes_list_mutex); | ||
1030 | /* | ||
1031 | * To synchronize with NMI handler, ghes can only be | ||
1032 | * freed after NMI handler finishes. | ||
1033 | */ | ||
1034 | synchronize_rcu(); | ||
1035 | len = ghes_esource_prealloc_size(generic); | ||
1036 | ghes_estatus_pool_shrink(len); | ||
1037 | break; | 1084 | break; |
1038 | default: | 1085 | default: |
1039 | BUG(); | 1086 | BUG(); |
@@ -1077,7 +1124,7 @@ static int __init ghes_init(void) | |||
1077 | return -EINVAL; | 1124 | return -EINVAL; |
1078 | } | 1125 | } |
1079 | 1126 | ||
1080 | init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); | 1127 | ghes_nmi_init_cxt(); |
1081 | 1128 | ||
1082 | rc = ghes_ioremap_init(); | 1129 | rc = ghes_ioremap_init(); |
1083 | if (rc) | 1130 | if (rc) |