diff options
Diffstat (limited to 'drivers/acpi/apei/ghes.c')
-rw-r--r-- | drivers/acpi/apei/ghes.c | 427 |
1 files changed, 427 insertions, 0 deletions
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c new file mode 100644 index 000000000000..fd0cc016a099 --- /dev/null +++ b/drivers/acpi/apei/ghes.c | |||
@@ -0,0 +1,427 @@ | |||
1 | /* | ||
2 | * APEI Generic Hardware Error Source support | ||
3 | * | ||
4 | * Generic Hardware Error Source provides a way to report platform | ||
5 | * hardware errors (such as that from chipset). It works in so called | ||
6 | * "Firmware First" mode, that is, hardware errors are reported to | ||
7 | * firmware firstly, then reported to Linux by firmware. This way, | ||
8 | * some non-standard hardware error registers or non-standard hardware | ||
9 | * link can be checked by firmware to produce more hardware error | ||
10 | * information for Linux. | ||
11 | * | ||
12 | * For more information about Generic Hardware Error Source, please | ||
13 | * refer to ACPI Specification version 4.0, section 17.3.2.6 | ||
14 | * | ||
15 | * Now, only SCI notification type and memory errors are | ||
16 | * supported. More notification type and hardware error type will be | ||
17 | * added later. | ||
18 | * | ||
19 | * Copyright 2010 Intel Corp. | ||
20 | * Author: Huang Ying <ying.huang@intel.com> | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or | ||
23 | * modify it under the terms of the GNU General Public License version | ||
24 | * 2 as published by the Free Software Foundation; | ||
25 | * | ||
26 | * This program is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with this program; if not, write to the Free Software | ||
33 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
34 | */ | ||
35 | |||
36 | #include <linux/kernel.h> | ||
37 | #include <linux/module.h> | ||
38 | #include <linux/init.h> | ||
39 | #include <linux/acpi.h> | ||
40 | #include <linux/io.h> | ||
41 | #include <linux/interrupt.h> | ||
42 | #include <linux/cper.h> | ||
43 | #include <linux/kdebug.h> | ||
44 | #include <acpi/apei.h> | ||
45 | #include <acpi/atomicio.h> | ||
46 | #include <acpi/hed.h> | ||
47 | #include <asm/mce.h> | ||
48 | |||
49 | #include "apei-internal.h" | ||
50 | |||
51 | #define GHES_PFX "GHES: " | ||
52 | |||
53 | #define GHES_ESTATUS_MAX_SIZE 65536 | ||
54 | |||
55 | /* | ||
56 | * One struct ghes is created for each generic hardware error | ||
57 | * source. | ||
58 | * | ||
59 | * It provides the context for APEI hardware error timer/IRQ/SCI/NMI | ||
60 | * handler. Handler for one generic hardware error source is only | ||
61 | * triggered after the previous one is done. So handler can uses | ||
62 | * struct ghes without locking. | ||
63 | * | ||
64 | * estatus: memory buffer for error status block, allocated during | ||
65 | * HEST parsing. | ||
66 | */ | ||
67 | #define GHES_TO_CLEAR 0x0001 | ||
68 | |||
69 | struct ghes { | ||
70 | struct acpi_hest_generic *generic; | ||
71 | struct acpi_hest_generic_status *estatus; | ||
72 | struct list_head list; | ||
73 | u64 buffer_paddr; | ||
74 | unsigned long flags; | ||
75 | }; | ||
76 | |||
77 | /* | ||
78 | * Error source lists, one list for each notification method. The | ||
79 | * members in lists are struct ghes. | ||
80 | * | ||
81 | * The list members are only added in HEST parsing and deleted during | ||
82 | * module_exit, that is, single-threaded. So no lock is needed for | ||
83 | * that. | ||
84 | * | ||
85 | * But the mutual exclusion is needed between members adding/deleting | ||
86 | * and timer/IRQ/SCI/NMI handler, which may traverse the list. RCU is | ||
87 | * used for that. | ||
88 | */ | ||
89 | static LIST_HEAD(ghes_sci); | ||
90 | |||
91 | static struct ghes *ghes_new(struct acpi_hest_generic *generic) | ||
92 | { | ||
93 | struct ghes *ghes; | ||
94 | unsigned int error_block_length; | ||
95 | int rc; | ||
96 | |||
97 | ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); | ||
98 | if (!ghes) | ||
99 | return ERR_PTR(-ENOMEM); | ||
100 | ghes->generic = generic; | ||
101 | INIT_LIST_HEAD(&ghes->list); | ||
102 | rc = acpi_pre_map_gar(&generic->error_status_address); | ||
103 | if (rc) | ||
104 | goto err_free; | ||
105 | error_block_length = generic->error_block_length; | ||
106 | if (error_block_length > GHES_ESTATUS_MAX_SIZE) { | ||
107 | pr_warning(FW_WARN GHES_PFX | ||
108 | "Error status block length is too long: %u for " | ||
109 | "generic hardware error source: %d.\n", | ||
110 | error_block_length, generic->header.source_id); | ||
111 | error_block_length = GHES_ESTATUS_MAX_SIZE; | ||
112 | } | ||
113 | ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); | ||
114 | if (!ghes->estatus) { | ||
115 | rc = -ENOMEM; | ||
116 | goto err_unmap; | ||
117 | } | ||
118 | |||
119 | return ghes; | ||
120 | |||
121 | err_unmap: | ||
122 | acpi_post_unmap_gar(&generic->error_status_address); | ||
123 | err_free: | ||
124 | kfree(ghes); | ||
125 | return ERR_PTR(rc); | ||
126 | } | ||
127 | |||
128 | static void ghes_fini(struct ghes *ghes) | ||
129 | { | ||
130 | kfree(ghes->estatus); | ||
131 | acpi_post_unmap_gar(&ghes->generic->error_status_address); | ||
132 | } | ||
133 | |||
134 | enum { | ||
135 | GHES_SER_NO = 0x0, | ||
136 | GHES_SER_CORRECTED = 0x1, | ||
137 | GHES_SER_RECOVERABLE = 0x2, | ||
138 | GHES_SER_PANIC = 0x3, | ||
139 | }; | ||
140 | |||
141 | static inline int ghes_severity(int severity) | ||
142 | { | ||
143 | switch (severity) { | ||
144 | case CPER_SER_INFORMATIONAL: | ||
145 | return GHES_SER_NO; | ||
146 | case CPER_SER_CORRECTED: | ||
147 | return GHES_SER_CORRECTED; | ||
148 | case CPER_SER_RECOVERABLE: | ||
149 | return GHES_SER_RECOVERABLE; | ||
150 | case CPER_SER_FATAL: | ||
151 | return GHES_SER_PANIC; | ||
152 | default: | ||
153 | /* Unkown, go panic */ | ||
154 | return GHES_SER_PANIC; | ||
155 | } | ||
156 | } | ||
157 | |||
158 | /* SCI handler run in work queue, so ioremap can be used here */ | ||
159 | static int ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, | ||
160 | int from_phys) | ||
161 | { | ||
162 | void *vaddr; | ||
163 | |||
164 | vaddr = ioremap_cache(paddr, len); | ||
165 | if (!vaddr) | ||
166 | return -ENOMEM; | ||
167 | if (from_phys) | ||
168 | memcpy(buffer, vaddr, len); | ||
169 | else | ||
170 | memcpy(vaddr, buffer, len); | ||
171 | iounmap(vaddr); | ||
172 | |||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | static int ghes_read_estatus(struct ghes *ghes, int silent) | ||
177 | { | ||
178 | struct acpi_hest_generic *g = ghes->generic; | ||
179 | u64 buf_paddr; | ||
180 | u32 len; | ||
181 | int rc; | ||
182 | |||
183 | rc = acpi_atomic_read(&buf_paddr, &g->error_status_address); | ||
184 | if (rc) { | ||
185 | if (!silent && printk_ratelimit()) | ||
186 | pr_warning(FW_WARN GHES_PFX | ||
187 | "Failed to read error status block address for hardware error source: %d.\n", | ||
188 | g->header.source_id); | ||
189 | return -EIO; | ||
190 | } | ||
191 | if (!buf_paddr) | ||
192 | return -ENOENT; | ||
193 | |||
194 | rc = ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, | ||
195 | sizeof(*ghes->estatus), 1); | ||
196 | if (rc) | ||
197 | return rc; | ||
198 | if (!ghes->estatus->block_status) | ||
199 | return -ENOENT; | ||
200 | |||
201 | ghes->buffer_paddr = buf_paddr; | ||
202 | ghes->flags |= GHES_TO_CLEAR; | ||
203 | |||
204 | rc = -EIO; | ||
205 | len = apei_estatus_len(ghes->estatus); | ||
206 | if (len < sizeof(*ghes->estatus)) | ||
207 | goto err_read_block; | ||
208 | if (len > ghes->generic->error_block_length) | ||
209 | goto err_read_block; | ||
210 | if (apei_estatus_check_header(ghes->estatus)) | ||
211 | goto err_read_block; | ||
212 | rc = ghes_copy_tofrom_phys(ghes->estatus + 1, | ||
213 | buf_paddr + sizeof(*ghes->estatus), | ||
214 | len - sizeof(*ghes->estatus), 1); | ||
215 | if (rc) | ||
216 | return rc; | ||
217 | if (apei_estatus_check(ghes->estatus)) | ||
218 | goto err_read_block; | ||
219 | rc = 0; | ||
220 | |||
221 | err_read_block: | ||
222 | if (rc && !silent) | ||
223 | pr_warning(FW_WARN GHES_PFX | ||
224 | "Failed to read error status block!\n"); | ||
225 | return rc; | ||
226 | } | ||
227 | |||
228 | static void ghes_clear_estatus(struct ghes *ghes) | ||
229 | { | ||
230 | ghes->estatus->block_status = 0; | ||
231 | if (!(ghes->flags & GHES_TO_CLEAR)) | ||
232 | return; | ||
233 | ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr, | ||
234 | sizeof(ghes->estatus->block_status), 0); | ||
235 | ghes->flags &= ~GHES_TO_CLEAR; | ||
236 | } | ||
237 | |||
238 | static void ghes_do_proc(struct ghes *ghes) | ||
239 | { | ||
240 | int ser, processed = 0; | ||
241 | struct acpi_hest_generic_data *gdata; | ||
242 | |||
243 | ser = ghes_severity(ghes->estatus->error_severity); | ||
244 | apei_estatus_for_each_section(ghes->estatus, gdata) { | ||
245 | #ifdef CONFIG_X86_MCE | ||
246 | if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, | ||
247 | CPER_SEC_PLATFORM_MEM)) { | ||
248 | apei_mce_report_mem_error( | ||
249 | ser == GHES_SER_CORRECTED, | ||
250 | (struct cper_sec_mem_err *)(gdata+1)); | ||
251 | processed = 1; | ||
252 | } | ||
253 | #endif | ||
254 | } | ||
255 | |||
256 | if (!processed && printk_ratelimit()) | ||
257 | pr_warning(GHES_PFX | ||
258 | "Unknown error record from generic hardware error source: %d\n", | ||
259 | ghes->generic->header.source_id); | ||
260 | } | ||
261 | |||
262 | static int ghes_proc(struct ghes *ghes) | ||
263 | { | ||
264 | int rc; | ||
265 | |||
266 | rc = ghes_read_estatus(ghes, 0); | ||
267 | if (rc) | ||
268 | goto out; | ||
269 | ghes_do_proc(ghes); | ||
270 | |||
271 | out: | ||
272 | ghes_clear_estatus(ghes); | ||
273 | return 0; | ||
274 | } | ||
275 | |||
276 | static int ghes_notify_sci(struct notifier_block *this, | ||
277 | unsigned long event, void *data) | ||
278 | { | ||
279 | struct ghes *ghes; | ||
280 | int ret = NOTIFY_DONE; | ||
281 | |||
282 | rcu_read_lock(); | ||
283 | list_for_each_entry_rcu(ghes, &ghes_sci, list) { | ||
284 | if (!ghes_proc(ghes)) | ||
285 | ret = NOTIFY_OK; | ||
286 | } | ||
287 | rcu_read_unlock(); | ||
288 | |||
289 | return ret; | ||
290 | } | ||
291 | |||
292 | static struct notifier_block ghes_notifier_sci = { | ||
293 | .notifier_call = ghes_notify_sci, | ||
294 | }; | ||
295 | |||
296 | static int hest_ghes_parse(struct acpi_hest_header *hest_hdr, void *data) | ||
297 | { | ||
298 | struct acpi_hest_generic *generic; | ||
299 | struct ghes *ghes = NULL; | ||
300 | int rc = 0; | ||
301 | |||
302 | if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR) | ||
303 | return 0; | ||
304 | |||
305 | generic = (struct acpi_hest_generic *)hest_hdr; | ||
306 | if (!generic->enabled) | ||
307 | return 0; | ||
308 | |||
309 | if (generic->error_block_length < | ||
310 | sizeof(struct acpi_hest_generic_status)) { | ||
311 | pr_warning(FW_BUG GHES_PFX | ||
312 | "Invalid error block length: %u for generic hardware error source: %d\n", | ||
313 | generic->error_block_length, | ||
314 | generic->header.source_id); | ||
315 | goto err; | ||
316 | } | ||
317 | if (generic->records_to_preallocate == 0) { | ||
318 | pr_warning(FW_BUG GHES_PFX | ||
319 | "Invalid records to preallocate: %u for generic hardware error source: %d\n", | ||
320 | generic->records_to_preallocate, | ||
321 | generic->header.source_id); | ||
322 | goto err; | ||
323 | } | ||
324 | ghes = ghes_new(generic); | ||
325 | if (IS_ERR(ghes)) { | ||
326 | rc = PTR_ERR(ghes); | ||
327 | ghes = NULL; | ||
328 | goto err; | ||
329 | } | ||
330 | switch (generic->notify.type) { | ||
331 | case ACPI_HEST_NOTIFY_POLLED: | ||
332 | pr_warning(GHES_PFX | ||
333 | "Generic hardware error source: %d notified via POLL is not supported!\n", | ||
334 | generic->header.source_id); | ||
335 | break; | ||
336 | case ACPI_HEST_NOTIFY_EXTERNAL: | ||
337 | case ACPI_HEST_NOTIFY_LOCAL: | ||
338 | pr_warning(GHES_PFX | ||
339 | "Generic hardware error source: %d notified via IRQ is not supported!\n", | ||
340 | generic->header.source_id); | ||
341 | break; | ||
342 | case ACPI_HEST_NOTIFY_SCI: | ||
343 | if (list_empty(&ghes_sci)) | ||
344 | register_acpi_hed_notifier(&ghes_notifier_sci); | ||
345 | list_add_rcu(&ghes->list, &ghes_sci); | ||
346 | break; | ||
347 | case ACPI_HEST_NOTIFY_NMI: | ||
348 | pr_warning(GHES_PFX | ||
349 | "Generic hardware error source: %d notified via NMI is not supported!\n", | ||
350 | generic->header.source_id); | ||
351 | break; | ||
352 | default: | ||
353 | pr_warning(FW_WARN GHES_PFX | ||
354 | "Unknown notification type: %u for generic hardware error source: %d\n", | ||
355 | generic->notify.type, generic->header.source_id); | ||
356 | break; | ||
357 | } | ||
358 | |||
359 | return 0; | ||
360 | err: | ||
361 | if (ghes) | ||
362 | ghes_fini(ghes); | ||
363 | return rc; | ||
364 | } | ||
365 | |||
366 | static void ghes_cleanup(void) | ||
367 | { | ||
368 | struct ghes *ghes, *nghes; | ||
369 | |||
370 | if (!list_empty(&ghes_sci)) | ||
371 | unregister_acpi_hed_notifier(&ghes_notifier_sci); | ||
372 | |||
373 | synchronize_rcu(); | ||
374 | |||
375 | list_for_each_entry_safe(ghes, nghes, &ghes_sci, list) { | ||
376 | list_del(&ghes->list); | ||
377 | ghes_fini(ghes); | ||
378 | kfree(ghes); | ||
379 | } | ||
380 | } | ||
381 | |||
382 | static int __init ghes_init(void) | ||
383 | { | ||
384 | int rc; | ||
385 | |||
386 | if (acpi_disabled) | ||
387 | return -ENODEV; | ||
388 | |||
389 | if (hest_disable) { | ||
390 | pr_info(GHES_PFX "HEST is not enabled!\n"); | ||
391 | return -EINVAL; | ||
392 | } | ||
393 | |||
394 | rc = apei_hest_parse(hest_ghes_parse, NULL); | ||
395 | if (rc) { | ||
396 | pr_err(GHES_PFX | ||
397 | "Error during parsing HEST generic hardware error sources.\n"); | ||
398 | goto err_cleanup; | ||
399 | } | ||
400 | |||
401 | if (list_empty(&ghes_sci)) { | ||
402 | pr_info(GHES_PFX | ||
403 | "No functional generic hardware error sources.\n"); | ||
404 | rc = -ENODEV; | ||
405 | goto err_cleanup; | ||
406 | } | ||
407 | |||
408 | pr_info(GHES_PFX | ||
409 | "Generic Hardware Error Source support is initialized.\n"); | ||
410 | |||
411 | return 0; | ||
412 | err_cleanup: | ||
413 | ghes_cleanup(); | ||
414 | return rc; | ||
415 | } | ||
416 | |||
417 | static void __exit ghes_exit(void) | ||
418 | { | ||
419 | ghes_cleanup(); | ||
420 | } | ||
421 | |||
422 | module_init(ghes_init); | ||
423 | module_exit(ghes_exit); | ||
424 | |||
425 | MODULE_AUTHOR("Huang Ying"); | ||
426 | MODULE_DESCRIPTION("APEI Generic Hardware Error Source support"); | ||
427 | MODULE_LICENSE("GPL"); | ||