aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/acpi/apei
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/acpi/apei')
-rw-r--r--drivers/acpi/apei/Kconfig2
-rw-r--r--drivers/acpi/apei/ghes.c209
2 files changed, 193 insertions, 18 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index 3f45dde17aec..35596eaaca17 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -13,6 +13,8 @@ config ACPI_APEI_GHES
13 bool "APEI Generic Hardware Error Source" 13 bool "APEI Generic Hardware Error Source"
14 depends on ACPI_APEI && X86 14 depends on ACPI_APEI && X86
15 select ACPI_HED 15 select ACPI_HED
16 select LLIST
17 select GENERIC_ALLOCATOR
16 help 18 help
17 Generic Hardware Error Source provides a way to report 19 Generic Hardware Error Source provides a way to report
18 platform hardware errors (such as that from chipset). It 20 platform hardware errors (such as that from chipset). It
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index b1390a61cde1..d1a40218e17e 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -12,7 +12,7 @@
12 * For more information about Generic Hardware Error Source, please 12 * For more information about Generic Hardware Error Source, please
13 * refer to ACPI Specification version 4.0, section 17.3.2.6 13 * refer to ACPI Specification version 4.0, section 17.3.2.6
14 * 14 *
15 * Copyright 2010 Intel Corp. 15 * Copyright 2010,2011 Intel Corp.
16 * Author: Huang Ying <ying.huang@intel.com> 16 * Author: Huang Ying <ying.huang@intel.com>
17 * 17 *
18 * This program is free software; you can redistribute it and/or 18 * This program is free software; you can redistribute it and/or
@@ -42,6 +42,9 @@
42#include <linux/mutex.h> 42#include <linux/mutex.h>
43#include <linux/ratelimit.h> 43#include <linux/ratelimit.h>
44#include <linux/vmalloc.h> 44#include <linux/vmalloc.h>
45#include <linux/irq_work.h>
46#include <linux/llist.h>
47#include <linux/genalloc.h>
45#include <acpi/apei.h> 48#include <acpi/apei.h>
46#include <acpi/atomicio.h> 49#include <acpi/atomicio.h>
47#include <acpi/hed.h> 50#include <acpi/hed.h>
@@ -53,6 +56,15 @@
53#define GHES_PFX "GHES: " 56#define GHES_PFX "GHES: "
54 57
55#define GHES_ESTATUS_MAX_SIZE 65536 58#define GHES_ESTATUS_MAX_SIZE 65536
59#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
60
61#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
62
63#define GHES_ESTATUS_NODE_LEN(estatus_len) \
64 (sizeof(struct ghes_estatus_node) + (estatus_len))
65#define GHES_ESTATUS_FROM_NODE(estatus_node) \
66 ((struct acpi_hest_generic_status *) \
67 ((struct ghes_estatus_node *)(estatus_node) + 1))
56 68
57/* 69/*
58 * One struct ghes is created for each generic hardware error source. 70 * One struct ghes is created for each generic hardware error source.
@@ -77,6 +89,11 @@ struct ghes {
77 }; 89 };
78}; 90};
79 91
92struct ghes_estatus_node {
93 struct llist_node llnode;
94 struct acpi_hest_generic *generic;
95};
96
80int ghes_disable; 97int ghes_disable;
81module_param_named(disable, ghes_disable, bool, 0); 98module_param_named(disable, ghes_disable, bool, 0);
82 99
@@ -124,6 +141,19 @@ static struct vm_struct *ghes_ioremap_area;
124static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); 141static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
125static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); 142static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
126 143
144/*
145 * printk is not safe in NMI context. So in NMI handler, we allocate
146 * required memory from lock-less memory allocator
147 * (ghes_estatus_pool), save estatus into it, put them into lock-less
148 * list (ghes_estatus_llist), then delay printk into IRQ context via
149 * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
150 * required pool size by all NMI error source.
151 */
152static struct gen_pool *ghes_estatus_pool;
153static unsigned long ghes_estatus_pool_size_request;
154static struct llist_head ghes_estatus_llist;
155static struct irq_work ghes_proc_irq_work;
156
127static int ghes_ioremap_init(void) 157static int ghes_ioremap_init(void)
128{ 158{
129 ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, 159 ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
@@ -183,6 +213,55 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
183 __flush_tlb_one(vaddr); 213 __flush_tlb_one(vaddr);
184} 214}
185 215
216static int ghes_estatus_pool_init(void)
217{
218 ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
219 if (!ghes_estatus_pool)
220 return -ENOMEM;
221 return 0;
222}
223
224static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
225 struct gen_pool_chunk *chunk,
226 void *data)
227{
228 free_page(chunk->start_addr);
229}
230
231static void ghes_estatus_pool_exit(void)
232{
233 gen_pool_for_each_chunk(ghes_estatus_pool,
234 ghes_estatus_pool_free_chunk_page, NULL);
235 gen_pool_destroy(ghes_estatus_pool);
236}
237
238static int ghes_estatus_pool_expand(unsigned long len)
239{
240 unsigned long i, pages, size, addr;
241 int ret;
242
243 ghes_estatus_pool_size_request += PAGE_ALIGN(len);
244 size = gen_pool_size(ghes_estatus_pool);
245 if (size >= ghes_estatus_pool_size_request)
246 return 0;
247 pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
248 for (i = 0; i < pages; i++) {
249 addr = __get_free_page(GFP_KERNEL);
250 if (!addr)
251 return -ENOMEM;
252 ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
253 if (ret)
254 return ret;
255 }
256
257 return 0;
258}
259
260static void ghes_estatus_pool_shrink(unsigned long len)
261{
262 ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
263}
264
186static struct ghes *ghes_new(struct acpi_hest_generic *generic) 265static struct ghes *ghes_new(struct acpi_hest_generic *generic)
187{ 266{
188 struct ghes *ghes; 267 struct ghes *ghes;
@@ -344,13 +423,13 @@ static void ghes_clear_estatus(struct ghes *ghes)
344 ghes->flags &= ~GHES_TO_CLEAR; 423 ghes->flags &= ~GHES_TO_CLEAR;
345} 424}
346 425
347static void ghes_do_proc(struct ghes *ghes) 426static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
348{ 427{
349 int sev, processed = 0; 428 int sev, processed = 0;
350 struct acpi_hest_generic_data *gdata; 429 struct acpi_hest_generic_data *gdata;
351 430
352 sev = ghes_severity(ghes->estatus->error_severity); 431 sev = ghes_severity(estatus->error_severity);
353 apei_estatus_for_each_section(ghes->estatus, gdata) { 432 apei_estatus_for_each_section(estatus, gdata) {
354#ifdef CONFIG_X86_MCE 433#ifdef CONFIG_X86_MCE
355 if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, 434 if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
356 CPER_SEC_PLATFORM_MEM)) { 435 CPER_SEC_PLATFORM_MEM)) {
@@ -363,27 +442,37 @@ static void ghes_do_proc(struct ghes *ghes)
363 } 442 }
364} 443}
365 444
366static void __ghes_print_estatus(const char *pfx, struct ghes *ghes) 445static void __ghes_print_estatus(const char *pfx,
446 const struct acpi_hest_generic *generic,
447 const struct acpi_hest_generic_status *estatus)
367{ 448{
368 if (pfx == NULL) { 449 if (pfx == NULL) {
369 if (ghes_severity(ghes->estatus->error_severity) <= 450 if (ghes_severity(estatus->error_severity) <=
370 GHES_SEV_CORRECTED) 451 GHES_SEV_CORRECTED)
371 pfx = KERN_WARNING HW_ERR; 452 pfx = KERN_WARNING HW_ERR;
372 else 453 else
373 pfx = KERN_ERR HW_ERR; 454 pfx = KERN_ERR HW_ERR;
374 } 455 }
375 printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", 456 printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
376 pfx, ghes->generic->header.source_id); 457 pfx, generic->header.source_id);
377 apei_estatus_print(pfx, ghes->estatus); 458 apei_estatus_print(pfx, estatus);
378} 459}
379 460
380static void ghes_print_estatus(const char *pfx, struct ghes *ghes) 461static void ghes_print_estatus(const char *pfx,
462 const struct acpi_hest_generic *generic,
463 const struct acpi_hest_generic_status *estatus)
381{ 464{
382 /* Not more than 2 messages every 5 seconds */ 465 /* Not more than 2 messages every 5 seconds */
383 static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2); 466 static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
467 static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
468 struct ratelimit_state *ratelimit;
384 469
385 if (__ratelimit(&ratelimit)) 470 if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
386 __ghes_print_estatus(pfx, ghes); 471 ratelimit = &ratelimit_corrected;
472 else
473 ratelimit = &ratelimit_uncorrected;
474 if (__ratelimit(ratelimit))
475 __ghes_print_estatus(pfx, generic, estatus);
387} 476}
388 477
389static int ghes_proc(struct ghes *ghes) 478static int ghes_proc(struct ghes *ghes)
@@ -393,8 +482,8 @@ static int ghes_proc(struct ghes *ghes)
393 rc = ghes_read_estatus(ghes, 0); 482 rc = ghes_read_estatus(ghes, 0);
394 if (rc) 483 if (rc)
395 goto out; 484 goto out;
396 ghes_print_estatus(NULL, ghes); 485 ghes_print_estatus(NULL, ghes->generic, ghes->estatus);
397 ghes_do_proc(ghes); 486 ghes_do_proc(ghes->estatus);
398 487
399out: 488out:
400 ghes_clear_estatus(ghes); 489 ghes_clear_estatus(ghes);
@@ -453,6 +542,40 @@ static int ghes_notify_sci(struct notifier_block *this,
453 return ret; 542 return ret;
454} 543}
455 544
545static void ghes_proc_in_irq(struct irq_work *irq_work)
546{
547 struct llist_node *llnode, *next, *tail = NULL;
548 struct ghes_estatus_node *estatus_node;
549 struct acpi_hest_generic_status *estatus;
550 u32 len, node_len;
551
552 /*
553 * Because the time order of estatus in list is reversed,
554 * revert it back to proper order.
555 */
556 llnode = llist_del_all(&ghes_estatus_llist);
557 while (llnode) {
558 next = llnode->next;
559 llnode->next = tail;
560 tail = llnode;
561 llnode = next;
562 }
563 llnode = tail;
564 while (llnode) {
565 next = llnode->next;
566 estatus_node = llist_entry(llnode, struct ghes_estatus_node,
567 llnode);
568 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
569 len = apei_estatus_len(estatus);
570 node_len = GHES_ESTATUS_NODE_LEN(len);
571 ghes_do_proc(estatus);
572 ghes_print_estatus(NULL, estatus_node->generic, estatus);
573 gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
574 node_len);
575 llnode = next;
576 }
577}
578
456static int ghes_notify_nmi(struct notifier_block *this, 579static int ghes_notify_nmi(struct notifier_block *this,
457 unsigned long cmd, void *data) 580 unsigned long cmd, void *data)
458{ 581{
@@ -482,7 +605,8 @@ static int ghes_notify_nmi(struct notifier_block *this,
482 605
483 if (sev_global >= GHES_SEV_PANIC) { 606 if (sev_global >= GHES_SEV_PANIC) {
484 oops_begin(); 607 oops_begin();
485 __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global); 608 __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic,
609 ghes_global->estatus);
486 /* reboot to log the error! */ 610 /* reboot to log the error! */
487 if (panic_timeout == 0) 611 if (panic_timeout == 0)
488 panic_timeout = ghes_panic_timeout; 612 panic_timeout = ghes_panic_timeout;
@@ -490,12 +614,31 @@ static int ghes_notify_nmi(struct notifier_block *this,
490 } 614 }
491 615
492 list_for_each_entry_rcu(ghes, &ghes_nmi, list) { 616 list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
617#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
618 u32 len, node_len;
619 struct ghes_estatus_node *estatus_node;
620 struct acpi_hest_generic_status *estatus;
621#endif
493 if (!(ghes->flags & GHES_TO_CLEAR)) 622 if (!(ghes->flags & GHES_TO_CLEAR))
494 continue; 623 continue;
495 /* Do not print estatus because printk is not NMI safe */ 624#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
496 ghes_do_proc(ghes); 625 /* Save estatus for further processing in IRQ context */
626 len = apei_estatus_len(ghes->estatus);
627 node_len = GHES_ESTATUS_NODE_LEN(len);
628 estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
629 node_len);
630 if (estatus_node) {
631 estatus_node->generic = ghes->generic;
632 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
633 memcpy(estatus, ghes->estatus, len);
634 llist_add(&estatus_node->llnode, &ghes_estatus_llist);
635 }
636#endif
497 ghes_clear_estatus(ghes); 637 ghes_clear_estatus(ghes);
498 } 638 }
639#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
640 irq_work_queue(&ghes_proc_irq_work);
641#endif
499 642
500out: 643out:
501 raw_spin_unlock(&ghes_nmi_lock); 644 raw_spin_unlock(&ghes_nmi_lock);
@@ -510,10 +653,26 @@ static struct notifier_block ghes_notifier_nmi = {
510 .notifier_call = ghes_notify_nmi, 653 .notifier_call = ghes_notify_nmi,
511}; 654};
512 655
656static unsigned long ghes_esource_prealloc_size(
657 const struct acpi_hest_generic *generic)
658{
659 unsigned long block_length, prealloc_records, prealloc_size;
660
661 block_length = min_t(unsigned long, generic->error_block_length,
662 GHES_ESTATUS_MAX_SIZE);
663 prealloc_records = max_t(unsigned long,
664 generic->records_to_preallocate, 1);
665 prealloc_size = min_t(unsigned long, block_length * prealloc_records,
666 GHES_ESOURCE_PREALLOC_MAX_SIZE);
667
668 return prealloc_size;
669}
670
513static int __devinit ghes_probe(struct platform_device *ghes_dev) 671static int __devinit ghes_probe(struct platform_device *ghes_dev)
514{ 672{
515 struct acpi_hest_generic *generic; 673 struct acpi_hest_generic *generic;
516 struct ghes *ghes = NULL; 674 struct ghes *ghes = NULL;
675 unsigned long len;
517 int rc = -EINVAL; 676 int rc = -EINVAL;
518 677
519 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; 678 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
@@ -579,6 +738,8 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev)
579 mutex_unlock(&ghes_list_mutex); 738 mutex_unlock(&ghes_list_mutex);
580 break; 739 break;
581 case ACPI_HEST_NOTIFY_NMI: 740 case ACPI_HEST_NOTIFY_NMI:
741 len = ghes_esource_prealloc_size(generic);
742 ghes_estatus_pool_expand(len);
582 mutex_lock(&ghes_list_mutex); 743 mutex_lock(&ghes_list_mutex);
583 if (list_empty(&ghes_nmi)) 744 if (list_empty(&ghes_nmi))
584 register_die_notifier(&ghes_notifier_nmi); 745 register_die_notifier(&ghes_notifier_nmi);
@@ -603,6 +764,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
603{ 764{
604 struct ghes *ghes; 765 struct ghes *ghes;
605 struct acpi_hest_generic *generic; 766 struct acpi_hest_generic *generic;
767 unsigned long len;
606 768
607 ghes = platform_get_drvdata(ghes_dev); 769 ghes = platform_get_drvdata(ghes_dev);
608 generic = ghes->generic; 770 generic = ghes->generic;
@@ -633,6 +795,8 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
633 * freed after NMI handler finishes. 795 * freed after NMI handler finishes.
634 */ 796 */
635 synchronize_rcu(); 797 synchronize_rcu();
798 len = ghes_esource_prealloc_size(generic);
799 ghes_estatus_pool_shrink(len);
636 break; 800 break;
637 default: 801 default:
638 BUG(); 802 BUG();
@@ -673,14 +837,20 @@ static int __init ghes_init(void)
673 return -EINVAL; 837 return -EINVAL;
674 } 838 }
675 839
840 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
841
676 rc = ghes_ioremap_init(); 842 rc = ghes_ioremap_init();
677 if (rc) 843 if (rc)
678 goto err; 844 goto err;
679 845
680 rc = platform_driver_register(&ghes_platform_driver); 846 rc = ghes_estatus_pool_init();
681 if (rc) 847 if (rc)
682 goto err_ioremap_exit; 848 goto err_ioremap_exit;
683 849
850 rc = platform_driver_register(&ghes_platform_driver);
851 if (rc)
852 goto err_pool_exit;
853
684 rc = apei_osc_setup(); 854 rc = apei_osc_setup();
685 if (rc == 0 && osc_sb_apei_support_acked) 855 if (rc == 0 && osc_sb_apei_support_acked)
686 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); 856 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
@@ -692,6 +862,8 @@ static int __init ghes_init(void)
692 pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); 862 pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
693 863
694 return 0; 864 return 0;
865err_pool_exit:
866 ghes_estatus_pool_exit();
695err_ioremap_exit: 867err_ioremap_exit:
696 ghes_ioremap_exit(); 868 ghes_ioremap_exit();
697err: 869err:
@@ -701,6 +873,7 @@ err:
701static void __exit ghes_exit(void) 873static void __exit ghes_exit(void)
702{ 874{
703 platform_driver_unregister(&ghes_platform_driver); 875 platform_driver_unregister(&ghes_platform_driver);
876 ghes_estatus_pool_exit();
704 ghes_ioremap_exit(); 877 ghes_ioremap_exit();
705} 878}
706 879