aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLen Brown <len.brown@intel.com>2011-01-12 05:02:22 -0500
committerLen Brown <len.brown@intel.com>2011-01-12 05:02:22 -0500
commit03b6e6e58d9dd5f3068288653810db3c15fde929 (patch)
treeb6edb300df2e230230f1f7428ffa54f27b0a040d /drivers
parent9e0c20bceb611917a7546172ac617d07e5af0cfa (diff)
parent81e88fdc432a1552401d6e91a984dcccce72b8dc (diff)
Merge branch 'apei' into release
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/apei/apei-internal.h2
-rw-r--r--drivers/acpi/apei/cper.c311
-rw-r--r--drivers/acpi/apei/ghes.c431
3 files changed, 656 insertions, 88 deletions
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index 18df1e940276..ef0581f2094d 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -109,6 +109,8 @@ static inline u32 apei_estatus_len(struct acpi_hest_generic_status *estatus)
109 return sizeof(*estatus) + estatus->data_length; 109 return sizeof(*estatus) + estatus->data_length;
110} 110}
111 111
112void apei_estatus_print(const char *pfx,
113 const struct acpi_hest_generic_status *estatus);
112int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus); 114int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus);
113int apei_estatus_check(const struct acpi_hest_generic_status *estatus); 115int apei_estatus_check(const struct acpi_hest_generic_status *estatus);
114#endif 116#endif
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index f4cf2fc4c8c1..31464a006d76 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -46,6 +46,317 @@ u64 cper_next_record_id(void)
46} 46}
47EXPORT_SYMBOL_GPL(cper_next_record_id); 47EXPORT_SYMBOL_GPL(cper_next_record_id);
48 48
49static const char *cper_severity_strs[] = {
50 "recoverable",
51 "fatal",
52 "corrected",
53 "info",
54};
55
56static const char *cper_severity_str(unsigned int severity)
57{
58 return severity < ARRAY_SIZE(cper_severity_strs) ?
59 cper_severity_strs[severity] : "unknown";
60}
61
62/*
63 * cper_print_bits - print strings for set bits
64 * @pfx: prefix for each line, including log level and prefix string
65 * @bits: bit mask
66 * @strs: string array, indexed by bit position
67 * @strs_size: size of the string array: @strs
68 *
69 * For each set bit in @bits, print the corresponding string in @strs.
70 * If the output length is longer than 80, multiple line will be
71 * printed, with @pfx is printed at the beginning of each line.
72 */
73static void cper_print_bits(const char *pfx, unsigned int bits,
74 const char *strs[], unsigned int strs_size)
75{
76 int i, len = 0;
77 const char *str;
78 char buf[84];
79
80 for (i = 0; i < strs_size; i++) {
81 if (!(bits & (1U << i)))
82 continue;
83 str = strs[i];
84 if (len && len + strlen(str) + 2 > 80) {
85 printk("%s\n", buf);
86 len = 0;
87 }
88 if (!len)
89 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
90 else
91 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
92 }
93 if (len)
94 printk("%s\n", buf);
95}
96
97static const char *cper_proc_type_strs[] = {
98 "IA32/X64",
99 "IA64",
100};
101
102static const char *cper_proc_isa_strs[] = {
103 "IA32",
104 "IA64",
105 "X64",
106};
107
108static const char *cper_proc_error_type_strs[] = {
109 "cache error",
110 "TLB error",
111 "bus error",
112 "micro-architectural error",
113};
114
115static const char *cper_proc_op_strs[] = {
116 "unknown or generic",
117 "data read",
118 "data write",
119 "instruction execution",
120};
121
122static const char *cper_proc_flag_strs[] = {
123 "restartable",
124 "precise IP",
125 "overflow",
126 "corrected",
127};
128
129static void cper_print_proc_generic(const char *pfx,
130 const struct cper_sec_proc_generic *proc)
131{
132 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
133 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
134 proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
135 cper_proc_type_strs[proc->proc_type] : "unknown");
136 if (proc->validation_bits & CPER_PROC_VALID_ISA)
137 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
138 proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
139 cper_proc_isa_strs[proc->proc_isa] : "unknown");
140 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
141 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
142 cper_print_bits(pfx, proc->proc_error_type,
143 cper_proc_error_type_strs,
144 ARRAY_SIZE(cper_proc_error_type_strs));
145 }
146 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
147 printk("%s""operation: %d, %s\n", pfx, proc->operation,
148 proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
149 cper_proc_op_strs[proc->operation] : "unknown");
150 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
151 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
152 cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
153 ARRAY_SIZE(cper_proc_flag_strs));
154 }
155 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
156 printk("%s""level: %d\n", pfx, proc->level);
157 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
158 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
159 if (proc->validation_bits & CPER_PROC_VALID_ID)
160 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
161 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
162 printk("%s""target_address: 0x%016llx\n",
163 pfx, proc->target_addr);
164 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
165 printk("%s""requestor_id: 0x%016llx\n",
166 pfx, proc->requestor_id);
167 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
168 printk("%s""responder_id: 0x%016llx\n",
169 pfx, proc->responder_id);
170 if (proc->validation_bits & CPER_PROC_VALID_IP)
171 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
172}
173
174static const char *cper_mem_err_type_strs[] = {
175 "unknown",
176 "no error",
177 "single-bit ECC",
178 "multi-bit ECC",
179 "single-symbol chipkill ECC",
180 "multi-symbol chipkill ECC",
181 "master abort",
182 "target abort",
183 "parity error",
184 "watchdog timeout",
185 "invalid address",
186 "mirror Broken",
187 "memory sparing",
188 "scrub corrected error",
189 "scrub uncorrected error",
190};
191
192static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
193{
194 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
195 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
196 if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)
197 printk("%s""physical_address: 0x%016llx\n",
198 pfx, mem->physical_addr);
199 if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
200 printk("%s""physical_address_mask: 0x%016llx\n",
201 pfx, mem->physical_addr_mask);
202 if (mem->validation_bits & CPER_MEM_VALID_NODE)
203 printk("%s""node: %d\n", pfx, mem->node);
204 if (mem->validation_bits & CPER_MEM_VALID_CARD)
205 printk("%s""card: %d\n", pfx, mem->card);
206 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
207 printk("%s""module: %d\n", pfx, mem->module);
208 if (mem->validation_bits & CPER_MEM_VALID_BANK)
209 printk("%s""bank: %d\n", pfx, mem->bank);
210 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
211 printk("%s""device: %d\n", pfx, mem->device);
212 if (mem->validation_bits & CPER_MEM_VALID_ROW)
213 printk("%s""row: %d\n", pfx, mem->row);
214 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
215 printk("%s""column: %d\n", pfx, mem->column);
216 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
217 printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
218 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
219 printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
220 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
221 printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
222 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
223 printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
224 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
225 u8 etype = mem->error_type;
226 printk("%s""error_type: %d, %s\n", pfx, etype,
227 etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
228 cper_mem_err_type_strs[etype] : "unknown");
229 }
230}
231
232static const char *cper_pcie_port_type_strs[] = {
233 "PCIe end point",
234 "legacy PCI end point",
235 "unknown",
236 "unknown",
237 "root port",
238 "upstream switch port",
239 "downstream switch port",
240 "PCIe to PCI/PCI-X bridge",
241 "PCI/PCI-X to PCIe bridge",
242 "root complex integrated endpoint device",
243 "root complex event collector",
244};
245
246static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie)
247{
248 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
249 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
250 pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
251 cper_pcie_port_type_strs[pcie->port_type] : "unknown");
252 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
253 printk("%s""version: %d.%d\n", pfx,
254 pcie->version.major, pcie->version.minor);
255 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
256 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
257 pcie->command, pcie->status);
258 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
259 const __u8 *p;
260 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
261 pcie->device_id.segment, pcie->device_id.bus,
262 pcie->device_id.device, pcie->device_id.function);
263 printk("%s""slot: %d\n", pfx,
264 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
265 printk("%s""secondary_bus: 0x%02x\n", pfx,
266 pcie->device_id.secondary_bus);
267 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
268 pcie->device_id.vendor_id, pcie->device_id.device_id);
269 p = pcie->device_id.class_code;
270 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
271 }
272 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
273 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
274 pcie->serial_number.lower, pcie->serial_number.upper);
275 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
276 printk(
277 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
278 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
279}
280
281static const char *apei_estatus_section_flag_strs[] = {
282 "primary",
283 "containment warning",
284 "reset",
285 "threshold exceeded",
286 "resource not accessible",
287 "latent error",
288};
289
290static void apei_estatus_print_section(
291 const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
292{
293 uuid_le *sec_type = (uuid_le *)gdata->section_type;
294 __u16 severity;
295
296 severity = gdata->error_severity;
297 printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
298 cper_severity_str(severity));
299 printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
300 cper_print_bits(pfx, gdata->flags, apei_estatus_section_flag_strs,
301 ARRAY_SIZE(apei_estatus_section_flag_strs));
302 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
303 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
304 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
305 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
306
307 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
308 struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
309 printk("%s""section_type: general processor error\n", pfx);
310 if (gdata->error_data_length >= sizeof(*proc_err))
311 cper_print_proc_generic(pfx, proc_err);
312 else
313 goto err_section_too_small;
314 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
315 struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
316 printk("%s""section_type: memory error\n", pfx);
317 if (gdata->error_data_length >= sizeof(*mem_err))
318 cper_print_mem(pfx, mem_err);
319 else
320 goto err_section_too_small;
321 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
322 struct cper_sec_pcie *pcie = (void *)(gdata + 1);
323 printk("%s""section_type: PCIe error\n", pfx);
324 if (gdata->error_data_length >= sizeof(*pcie))
325 cper_print_pcie(pfx, pcie);
326 else
327 goto err_section_too_small;
328 } else
329 printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
330
331 return;
332
333err_section_too_small:
334 pr_err(FW_WARN "error section length is too small\n");
335}
336
337void apei_estatus_print(const char *pfx,
338 const struct acpi_hest_generic_status *estatus)
339{
340 struct acpi_hest_generic_data *gdata;
341 unsigned int data_len, gedata_len;
342 int sec_no = 0;
343 __u16 severity;
344
345 printk("%s""APEI generic hardware error status\n", pfx);
346 severity = estatus->error_severity;
347 printk("%s""severity: %d, %s\n", pfx, severity,
348 cper_severity_str(severity));
349 data_len = estatus->data_length;
350 gdata = (struct acpi_hest_generic_data *)(estatus + 1);
351 while (data_len > sizeof(*gdata)) {
352 gedata_len = gdata->error_data_length;
353 apei_estatus_print_section(pfx, gdata, sec_no);
354 data_len -= gedata_len + sizeof(*gdata);
355 sec_no++;
356 }
357}
358EXPORT_SYMBOL_GPL(apei_estatus_print);
359
49int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus) 360int apei_estatus_check_header(const struct acpi_hest_generic_status *estatus)
50{ 361{
51 if (estatus->data_length && 362 if (estatus->data_length &&
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0d505e59214d..d1d484d4a06a 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -12,10 +12,6 @@
12 * For more information about Generic Hardware Error Source, please 12 * For more information about Generic Hardware Error Source, please
13 * refer to ACPI Specification version 4.0, section 17.3.2.6 13 * refer to ACPI Specification version 4.0, section 17.3.2.6
14 * 14 *
15 * Now, only SCI notification type and memory errors are
16 * supported. More notification type and hardware error type will be
17 * added later.
18 *
19 * Copyright 2010 Intel Corp. 15 * Copyright 2010 Intel Corp.
20 * Author: Huang Ying <ying.huang@intel.com> 16 * Author: Huang Ying <ying.huang@intel.com>
21 * 17 *
@@ -39,14 +35,18 @@
39#include <linux/acpi.h> 35#include <linux/acpi.h>
40#include <linux/io.h> 36#include <linux/io.h>
41#include <linux/interrupt.h> 37#include <linux/interrupt.h>
38#include <linux/timer.h>
42#include <linux/cper.h> 39#include <linux/cper.h>
43#include <linux/kdebug.h> 40#include <linux/kdebug.h>
44#include <linux/platform_device.h> 41#include <linux/platform_device.h>
45#include <linux/mutex.h> 42#include <linux/mutex.h>
43#include <linux/ratelimit.h>
44#include <linux/vmalloc.h>
46#include <acpi/apei.h> 45#include <acpi/apei.h>
47#include <acpi/atomicio.h> 46#include <acpi/atomicio.h>
48#include <acpi/hed.h> 47#include <acpi/hed.h>
49#include <asm/mce.h> 48#include <asm/mce.h>
49#include <asm/tlbflush.h>
50 50
51#include "apei-internal.h" 51#include "apei-internal.h"
52 52
@@ -55,42 +55,131 @@
55#define GHES_ESTATUS_MAX_SIZE 65536 55#define GHES_ESTATUS_MAX_SIZE 65536
56 56
57/* 57/*
58 * One struct ghes is created for each generic hardware error 58 * One struct ghes is created for each generic hardware error source.
59 * source.
60 *
61 * It provides the context for APEI hardware error timer/IRQ/SCI/NMI 59 * It provides the context for APEI hardware error timer/IRQ/SCI/NMI
62 * handler. Handler for one generic hardware error source is only 60 * handler.
63 * triggered after the previous one is done. So handler can uses
64 * struct ghes without locking.
65 * 61 *
66 * estatus: memory buffer for error status block, allocated during 62 * estatus: memory buffer for error status block, allocated during
67 * HEST parsing. 63 * HEST parsing.
68 */ 64 */
69#define GHES_TO_CLEAR 0x0001 65#define GHES_TO_CLEAR 0x0001
66#define GHES_EXITING 0x0002
70 67
71struct ghes { 68struct ghes {
72 struct acpi_hest_generic *generic; 69 struct acpi_hest_generic *generic;
73 struct acpi_hest_generic_status *estatus; 70 struct acpi_hest_generic_status *estatus;
74 struct list_head list;
75 u64 buffer_paddr; 71 u64 buffer_paddr;
76 unsigned long flags; 72 unsigned long flags;
73 union {
74 struct list_head list;
75 struct timer_list timer;
76 unsigned int irq;
77 };
77}; 78};
78 79
80static int ghes_panic_timeout __read_mostly = 30;
81
79/* 82/*
80 * Error source lists, one list for each notification method. The 83 * All error sources notified with SCI shares one notifier function,
81 * members in lists are struct ghes. 84 * so they need to be linked and checked one by one. This is applied
85 * to NMI too.
82 * 86 *
83 * The list members are only added in HEST parsing and deleted during 87 * RCU is used for these lists, so ghes_list_mutex is only used for
84 * module_exit, that is, single-threaded. So no lock is needed for 88 * list changing, not for traversing.
85 * that.
86 *
87 * But the mutual exclusion is needed between members adding/deleting
88 * and timer/IRQ/SCI/NMI handler, which may traverse the list. RCU is
89 * used for that.
90 */ 89 */
91static LIST_HEAD(ghes_sci); 90static LIST_HEAD(ghes_sci);
91static LIST_HEAD(ghes_nmi);
92static DEFINE_MUTEX(ghes_list_mutex); 92static DEFINE_MUTEX(ghes_list_mutex);
93 93
94/*
95 * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
96 * mutual exclusion.
97 */
98static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
99
100/*
101 * Because the memory area used to transfer hardware error information
102 * from BIOS to Linux can be determined only in NMI, IRQ or timer
103 * handler, but general ioremap can not be used in atomic context, so
104 * a special version of atomic ioremap is implemented for that.
105 */
106
107/*
108 * Two virtual pages are used, one for NMI context, the other for
109 * IRQ/PROCESS context
110 */
111#define GHES_IOREMAP_PAGES 2
112#define GHES_IOREMAP_NMI_PAGE(base) (base)
113#define GHES_IOREMAP_IRQ_PAGE(base) ((base) + PAGE_SIZE)
114
115/* virtual memory area for atomic ioremap */
116static struct vm_struct *ghes_ioremap_area;
117/*
118 * These 2 spinlock is used to prevent atomic ioremap virtual memory
119 * area from being mapped simultaneously.
120 */
121static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
122static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
123
124static int ghes_ioremap_init(void)
125{
126 ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
127 VM_IOREMAP, VMALLOC_START, VMALLOC_END);
128 if (!ghes_ioremap_area) {
129 pr_err(GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n");
130 return -ENOMEM;
131 }
132
133 return 0;
134}
135
136static void ghes_ioremap_exit(void)
137{
138 free_vm_area(ghes_ioremap_area);
139}
140
141static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
142{
143 unsigned long vaddr;
144
145 vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
146 ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
147 pfn << PAGE_SHIFT, PAGE_KERNEL);
148
149 return (void __iomem *)vaddr;
150}
151
152static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
153{
154 unsigned long vaddr;
155
156 vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
157 ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
158 pfn << PAGE_SHIFT, PAGE_KERNEL);
159
160 return (void __iomem *)vaddr;
161}
162
163static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
164{
165 unsigned long vaddr = (unsigned long __force)vaddr_ptr;
166 void *base = ghes_ioremap_area->addr;
167
168 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
169 unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
170 __flush_tlb_one(vaddr);
171}
172
173static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
174{
175 unsigned long vaddr = (unsigned long __force)vaddr_ptr;
176 void *base = ghes_ioremap_area->addr;
177
178 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
179 unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
180 __flush_tlb_one(vaddr);
181}
182
94static struct ghes *ghes_new(struct acpi_hest_generic *generic) 183static struct ghes *ghes_new(struct acpi_hest_generic *generic)
95{ 184{
96 struct ghes *ghes; 185 struct ghes *ghes;
@@ -101,7 +190,6 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
101 if (!ghes) 190 if (!ghes)
102 return ERR_PTR(-ENOMEM); 191 return ERR_PTR(-ENOMEM);
103 ghes->generic = generic; 192 ghes->generic = generic;
104 INIT_LIST_HEAD(&ghes->list);
105 rc = acpi_pre_map_gar(&generic->error_status_address); 193 rc = acpi_pre_map_gar(&generic->error_status_address);
106 if (rc) 194 if (rc)
107 goto err_free; 195 goto err_free;
@@ -158,22 +246,41 @@ static inline int ghes_severity(int severity)
158 } 246 }
159} 247}
160 248
161/* SCI handler run in work queue, so ioremap can be used here */ 249static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
162static int ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, 250 int from_phys)
163 int from_phys)
164{ 251{
165 void *vaddr; 252 void __iomem *vaddr;
166 253 unsigned long flags = 0;
167 vaddr = ioremap_cache(paddr, len); 254 int in_nmi = in_nmi();
168 if (!vaddr) 255 u64 offset;
169 return -ENOMEM; 256 u32 trunk;
170 if (from_phys) 257
171 memcpy(buffer, vaddr, len); 258 while (len > 0) {
172 else 259 offset = paddr - (paddr & PAGE_MASK);
173 memcpy(vaddr, buffer, len); 260 if (in_nmi) {
174 iounmap(vaddr); 261 raw_spin_lock(&ghes_ioremap_lock_nmi);
175 262 vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT);
176 return 0; 263 } else {
264 spin_lock_irqsave(&ghes_ioremap_lock_irq, flags);
265 vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT);
266 }
267 trunk = PAGE_SIZE - offset;
268 trunk = min(trunk, len);
269 if (from_phys)
270 memcpy_fromio(buffer, vaddr + offset, trunk);
271 else
272 memcpy_toio(vaddr + offset, buffer, trunk);
273 len -= trunk;
274 paddr += trunk;
275 buffer += trunk;
276 if (in_nmi) {
277 ghes_iounmap_nmi(vaddr);
278 raw_spin_unlock(&ghes_ioremap_lock_nmi);
279 } else {
280 ghes_iounmap_irq(vaddr);
281 spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
282 }
283 }
177} 284}
178 285
179static int ghes_read_estatus(struct ghes *ghes, int silent) 286static int ghes_read_estatus(struct ghes *ghes, int silent)
@@ -194,10 +301,8 @@ static int ghes_read_estatus(struct ghes *ghes, int silent)
194 if (!buf_paddr) 301 if (!buf_paddr)
195 return -ENOENT; 302 return -ENOENT;
196 303
197 rc = ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, 304 ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
198 sizeof(*ghes->estatus), 1); 305 sizeof(*ghes->estatus), 1);
199 if (rc)
200 return rc;
201 if (!ghes->estatus->block_status) 306 if (!ghes->estatus->block_status)
202 return -ENOENT; 307 return -ENOENT;
203 308
@@ -212,17 +317,15 @@ static int ghes_read_estatus(struct ghes *ghes, int silent)
212 goto err_read_block; 317 goto err_read_block;
213 if (apei_estatus_check_header(ghes->estatus)) 318 if (apei_estatus_check_header(ghes->estatus))
214 goto err_read_block; 319 goto err_read_block;
215 rc = ghes_copy_tofrom_phys(ghes->estatus + 1, 320 ghes_copy_tofrom_phys(ghes->estatus + 1,
216 buf_paddr + sizeof(*ghes->estatus), 321 buf_paddr + sizeof(*ghes->estatus),
217 len - sizeof(*ghes->estatus), 1); 322 len - sizeof(*ghes->estatus), 1);
218 if (rc)
219 return rc;
220 if (apei_estatus_check(ghes->estatus)) 323 if (apei_estatus_check(ghes->estatus))
221 goto err_read_block; 324 goto err_read_block;
222 rc = 0; 325 rc = 0;
223 326
224err_read_block: 327err_read_block:
225 if (rc && !silent) 328 if (rc && !silent && printk_ratelimit())
226 pr_warning(FW_WARN GHES_PFX 329 pr_warning(FW_WARN GHES_PFX
227 "Failed to read error status block!\n"); 330 "Failed to read error status block!\n");
228 return rc; 331 return rc;
@@ -255,11 +358,26 @@ static void ghes_do_proc(struct ghes *ghes)
255 } 358 }
256#endif 359#endif
257 } 360 }
361}
258 362
259 if (!processed && printk_ratelimit()) 363static void ghes_print_estatus(const char *pfx, struct ghes *ghes)
260 pr_warning(GHES_PFX 364{
261 "Unknown error record from generic hardware error source: %d\n", 365 /* Not more than 2 messages every 5 seconds */
262 ghes->generic->header.source_id); 366 static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2);
367
368 if (pfx == NULL) {
369 if (ghes_severity(ghes->estatus->error_severity) <=
370 GHES_SEV_CORRECTED)
371 pfx = KERN_WARNING HW_ERR;
372 else
373 pfx = KERN_ERR HW_ERR;
374 }
375 if (__ratelimit(&ratelimit)) {
376 printk(
377 "%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
378 pfx, ghes->generic->header.source_id);
379 apei_estatus_print(pfx, ghes->estatus);
380 }
263} 381}
264 382
265static int ghes_proc(struct ghes *ghes) 383static int ghes_proc(struct ghes *ghes)
@@ -269,6 +387,7 @@ static int ghes_proc(struct ghes *ghes)
269 rc = ghes_read_estatus(ghes, 0); 387 rc = ghes_read_estatus(ghes, 0);
270 if (rc) 388 if (rc)
271 goto out; 389 goto out;
390 ghes_print_estatus(NULL, ghes);
272 ghes_do_proc(ghes); 391 ghes_do_proc(ghes);
273 392
274out: 393out:
@@ -276,6 +395,42 @@ out:
276 return 0; 395 return 0;
277} 396}
278 397
398static void ghes_add_timer(struct ghes *ghes)
399{
400 struct acpi_hest_generic *g = ghes->generic;
401 unsigned long expire;
402
403 if (!g->notify.poll_interval) {
404 pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
405 g->header.source_id);
406 return;
407 }
408 expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
409 ghes->timer.expires = round_jiffies_relative(expire);
410 add_timer(&ghes->timer);
411}
412
413static void ghes_poll_func(unsigned long data)
414{
415 struct ghes *ghes = (void *)data;
416
417 ghes_proc(ghes);
418 if (!(ghes->flags & GHES_EXITING))
419 ghes_add_timer(ghes);
420}
421
422static irqreturn_t ghes_irq_func(int irq, void *data)
423{
424 struct ghes *ghes = data;
425 int rc;
426
427 rc = ghes_proc(ghes);
428 if (rc)
429 return IRQ_NONE;
430
431 return IRQ_HANDLED;
432}
433
279static int ghes_notify_sci(struct notifier_block *this, 434static int ghes_notify_sci(struct notifier_block *this,
280 unsigned long event, void *data) 435 unsigned long event, void *data)
281{ 436{
@@ -292,10 +447,63 @@ static int ghes_notify_sci(struct notifier_block *this,
292 return ret; 447 return ret;
293} 448}
294 449
450static int ghes_notify_nmi(struct notifier_block *this,
451 unsigned long cmd, void *data)
452{
453 struct ghes *ghes, *ghes_global = NULL;
454 int sev, sev_global = -1;
455 int ret = NOTIFY_DONE;
456
457 if (cmd != DIE_NMI)
458 return ret;
459
460 raw_spin_lock(&ghes_nmi_lock);
461 list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
462 if (ghes_read_estatus(ghes, 1)) {
463 ghes_clear_estatus(ghes);
464 continue;
465 }
466 sev = ghes_severity(ghes->estatus->error_severity);
467 if (sev > sev_global) {
468 sev_global = sev;
469 ghes_global = ghes;
470 }
471 ret = NOTIFY_STOP;
472 }
473
474 if (ret == NOTIFY_DONE)
475 goto out;
476
477 if (sev_global >= GHES_SEV_PANIC) {
478 oops_begin();
479 ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global);
480 /* reboot to log the error! */
481 if (panic_timeout == 0)
482 panic_timeout = ghes_panic_timeout;
483 panic("Fatal hardware error!");
484 }
485
486 list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
487 if (!(ghes->flags & GHES_TO_CLEAR))
488 continue;
489 /* Do not print estatus because printk is not NMI safe */
490 ghes_do_proc(ghes);
491 ghes_clear_estatus(ghes);
492 }
493
494out:
495 raw_spin_unlock(&ghes_nmi_lock);
496 return ret;
497}
498
295static struct notifier_block ghes_notifier_sci = { 499static struct notifier_block ghes_notifier_sci = {
296 .notifier_call = ghes_notify_sci, 500 .notifier_call = ghes_notify_sci,
297}; 501};
298 502
503static struct notifier_block ghes_notifier_nmi = {
504 .notifier_call = ghes_notify_nmi,
505};
506
299static int __devinit ghes_probe(struct platform_device *ghes_dev) 507static int __devinit ghes_probe(struct platform_device *ghes_dev)
300{ 508{
301 struct acpi_hest_generic *generic; 509 struct acpi_hest_generic *generic;
@@ -306,18 +514,27 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev)
306 if (!generic->enabled) 514 if (!generic->enabled)
307 return -ENODEV; 515 return -ENODEV;
308 516
309 if (generic->error_block_length < 517 switch (generic->notify.type) {
310 sizeof(struct acpi_hest_generic_status)) { 518 case ACPI_HEST_NOTIFY_POLLED:
311 pr_warning(FW_BUG GHES_PFX 519 case ACPI_HEST_NOTIFY_EXTERNAL:
312"Invalid error block length: %u for generic hardware error source: %d\n", 520 case ACPI_HEST_NOTIFY_SCI:
313 generic->error_block_length, 521 case ACPI_HEST_NOTIFY_NMI:
522 break;
523 case ACPI_HEST_NOTIFY_LOCAL:
524 pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
314 generic->header.source_id); 525 generic->header.source_id);
315 goto err; 526 goto err;
527 default:
528 pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
529 generic->notify.type, generic->header.source_id);
530 goto err;
316 } 531 }
317 if (generic->records_to_preallocate == 0) { 532
318 pr_warning(FW_BUG GHES_PFX 533 rc = -EIO;
319"Invalid records to preallocate: %u for generic hardware error source: %d\n", 534 if (generic->error_block_length <
320 generic->records_to_preallocate, 535 sizeof(struct acpi_hest_generic_status)) {
536 pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
537 generic->error_block_length,
321 generic->header.source_id); 538 generic->header.source_id);
322 goto err; 539 goto err;
323 } 540 }
@@ -327,38 +544,43 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev)
327 ghes = NULL; 544 ghes = NULL;
328 goto err; 545 goto err;
329 } 546 }
330 if (generic->notify.type == ACPI_HEST_NOTIFY_SCI) { 547 switch (generic->notify.type) {
548 case ACPI_HEST_NOTIFY_POLLED:
549 ghes->timer.function = ghes_poll_func;
550 ghes->timer.data = (unsigned long)ghes;
551 init_timer_deferrable(&ghes->timer);
552 ghes_add_timer(ghes);
553 break;
554 case ACPI_HEST_NOTIFY_EXTERNAL:
555 /* External interrupt vector is GSI */
556 if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) {
557 pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
558 generic->header.source_id);
559 goto err;
560 }
561 if (request_irq(ghes->irq, ghes_irq_func,
562 0, "GHES IRQ", ghes)) {
563 pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
564 generic->header.source_id);
565 goto err;
566 }
567 break;
568 case ACPI_HEST_NOTIFY_SCI:
331 mutex_lock(&ghes_list_mutex); 569 mutex_lock(&ghes_list_mutex);
332 if (list_empty(&ghes_sci)) 570 if (list_empty(&ghes_sci))
333 register_acpi_hed_notifier(&ghes_notifier_sci); 571 register_acpi_hed_notifier(&ghes_notifier_sci);
334 list_add_rcu(&ghes->list, &ghes_sci); 572 list_add_rcu(&ghes->list, &ghes_sci);
335 mutex_unlock(&ghes_list_mutex); 573 mutex_unlock(&ghes_list_mutex);
336 } else { 574 break;
337 unsigned char *notify = NULL; 575 case ACPI_HEST_NOTIFY_NMI:
338 576 mutex_lock(&ghes_list_mutex);
339 switch (generic->notify.type) { 577 if (list_empty(&ghes_nmi))
340 case ACPI_HEST_NOTIFY_POLLED: 578 register_die_notifier(&ghes_notifier_nmi);
341 notify = "POLL"; 579 list_add_rcu(&ghes->list, &ghes_nmi);
342 break; 580 mutex_unlock(&ghes_list_mutex);
343 case ACPI_HEST_NOTIFY_EXTERNAL: 581 break;
344 case ACPI_HEST_NOTIFY_LOCAL: 582 default:
345 notify = "IRQ"; 583 BUG();
346 break;
347 case ACPI_HEST_NOTIFY_NMI:
348 notify = "NMI";
349 break;
350 }
351 if (notify) {
352 pr_warning(GHES_PFX
353"Generic hardware error source: %d notified via %s is not supported!\n",
354 generic->header.source_id, notify);
355 } else {
356 pr_warning(FW_WARN GHES_PFX
357"Unknown notification type: %u for generic hardware error source: %d\n",
358 generic->notify.type, generic->header.source_id);
359 }
360 rc = -ENODEV;
361 goto err;
362 } 584 }
363 platform_set_drvdata(ghes_dev, ghes); 585 platform_set_drvdata(ghes_dev, ghes);
364 586
@@ -379,7 +601,14 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
379 ghes = platform_get_drvdata(ghes_dev); 601 ghes = platform_get_drvdata(ghes_dev);
380 generic = ghes->generic; 602 generic = ghes->generic;
381 603
604 ghes->flags |= GHES_EXITING;
382 switch (generic->notify.type) { 605 switch (generic->notify.type) {
606 case ACPI_HEST_NOTIFY_POLLED:
607 del_timer_sync(&ghes->timer);
608 break;
609 case ACPI_HEST_NOTIFY_EXTERNAL:
610 free_irq(ghes->irq, ghes);
611 break;
383 case ACPI_HEST_NOTIFY_SCI: 612 case ACPI_HEST_NOTIFY_SCI:
384 mutex_lock(&ghes_list_mutex); 613 mutex_lock(&ghes_list_mutex);
385 list_del_rcu(&ghes->list); 614 list_del_rcu(&ghes->list);
@@ -387,12 +616,23 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
387 unregister_acpi_hed_notifier(&ghes_notifier_sci); 616 unregister_acpi_hed_notifier(&ghes_notifier_sci);
388 mutex_unlock(&ghes_list_mutex); 617 mutex_unlock(&ghes_list_mutex);
389 break; 618 break;
619 case ACPI_HEST_NOTIFY_NMI:
620 mutex_lock(&ghes_list_mutex);
621 list_del_rcu(&ghes->list);
622 if (list_empty(&ghes_nmi))
623 unregister_die_notifier(&ghes_notifier_nmi);
624 mutex_unlock(&ghes_list_mutex);
625 /*
626 * To synchronize with NMI handler, ghes can only be
627 * freed after NMI handler finishes.
628 */
629 synchronize_rcu();
630 break;
390 default: 631 default:
391 BUG(); 632 BUG();
392 break; 633 break;
393 } 634 }
394 635
395 synchronize_rcu();
396 ghes_fini(ghes); 636 ghes_fini(ghes);
397 kfree(ghes); 637 kfree(ghes);
398 638
@@ -412,6 +652,8 @@ static struct platform_driver ghes_platform_driver = {
412 652
413static int __init ghes_init(void) 653static int __init ghes_init(void)
414{ 654{
655 int rc;
656
415 if (acpi_disabled) 657 if (acpi_disabled)
416 return -ENODEV; 658 return -ENODEV;
417 659
@@ -420,12 +662,25 @@ static int __init ghes_init(void)
420 return -EINVAL; 662 return -EINVAL;
421 } 663 }
422 664
423 return platform_driver_register(&ghes_platform_driver); 665 rc = ghes_ioremap_init();
666 if (rc)
667 goto err;
668
669 rc = platform_driver_register(&ghes_platform_driver);
670 if (rc)
671 goto err_ioremap_exit;
672
673 return 0;
674err_ioremap_exit:
675 ghes_ioremap_exit();
676err:
677 return rc;
424} 678}
425 679
426static void __exit ghes_exit(void) 680static void __exit ghes_exit(void)
427{ 681{
428 platform_driver_unregister(&ghes_platform_driver); 682 platform_driver_unregister(&ghes_platform_driver);
683 ghes_ioremap_exit();
429} 684}
430 685
431module_init(ghes_init); 686module_init(ghes_init);