aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2011-07-13 01:14:25 -0400
committerLen Brown <len.brown@intel.com>2011-08-03 11:15:57 -0400
commit67eb2e99076708cc790019a6a08ca3e0ae130a3a (patch)
treedc3863496a4b6c4e30450f1b94d3e1c87b858e7a
parent7f184275aa306046fe7edcbef3229754f0d97402 (diff)
ACPI, APEI, GHES, printk support for recoverable error via NMI
Some APEI GHES recoverable errors are reported via NMI, but printk is not safe in NMI context. To solve the issue, a lock-less memory allocator is used to allocate memory in NMI handler, save the error record into the allocated memory, put the error record into a lock-less list. On the other hand, an irq_work is used to delay the operation from NMI context to IRQ context. The irq_work IRQ handler will remove nodes from lock-less list, printk the error record and do some further processing include recovery operation, then free the memory. Signed-off-by: Huang Ying <ying.huang@intel.com> Signed-off-by: Len Brown <len.brown@intel.com>
-rw-r--r--drivers/acpi/apei/Kconfig2
-rw-r--r--drivers/acpi/apei/ghes.c209
2 files changed, 193 insertions, 18 deletions
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index 3f45dde17aec..35596eaaca17 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -13,6 +13,8 @@ config ACPI_APEI_GHES
13 bool "APEI Generic Hardware Error Source" 13 bool "APEI Generic Hardware Error Source"
14 depends on ACPI_APEI && X86 14 depends on ACPI_APEI && X86
15 select ACPI_HED 15 select ACPI_HED
16 select LLIST
17 select GENERIC_ALLOCATOR
16 help 18 help
17 Generic Hardware Error Source provides a way to report 19 Generic Hardware Error Source provides a way to report
18 platform hardware errors (such as that from chipset). It 20 platform hardware errors (such as that from chipset). It
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index b1390a61cde1..d1a40218e17e 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -12,7 +12,7 @@
12 * For more information about Generic Hardware Error Source, please 12 * For more information about Generic Hardware Error Source, please
13 * refer to ACPI Specification version 4.0, section 17.3.2.6 13 * refer to ACPI Specification version 4.0, section 17.3.2.6
14 * 14 *
15 * Copyright 2010 Intel Corp. 15 * Copyright 2010,2011 Intel Corp.
16 * Author: Huang Ying <ying.huang@intel.com> 16 * Author: Huang Ying <ying.huang@intel.com>
17 * 17 *
18 * This program is free software; you can redistribute it and/or 18 * This program is free software; you can redistribute it and/or
@@ -42,6 +42,9 @@
42#include <linux/mutex.h> 42#include <linux/mutex.h>
43#include <linux/ratelimit.h> 43#include <linux/ratelimit.h>
44#include <linux/vmalloc.h> 44#include <linux/vmalloc.h>
45#include <linux/irq_work.h>
46#include <linux/llist.h>
47#include <linux/genalloc.h>
45#include <acpi/apei.h> 48#include <acpi/apei.h>
46#include <acpi/atomicio.h> 49#include <acpi/atomicio.h>
47#include <acpi/hed.h> 50#include <acpi/hed.h>
@@ -53,6 +56,15 @@
53#define GHES_PFX "GHES: " 56#define GHES_PFX "GHES: "
54 57
55#define GHES_ESTATUS_MAX_SIZE 65536 58#define GHES_ESTATUS_MAX_SIZE 65536
59#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
60
61#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
62
63#define GHES_ESTATUS_NODE_LEN(estatus_len) \
64 (sizeof(struct ghes_estatus_node) + (estatus_len))
65#define GHES_ESTATUS_FROM_NODE(estatus_node) \
66 ((struct acpi_hest_generic_status *) \
67 ((struct ghes_estatus_node *)(estatus_node) + 1))
56 68
57/* 69/*
58 * One struct ghes is created for each generic hardware error source. 70 * One struct ghes is created for each generic hardware error source.
@@ -77,6 +89,11 @@ struct ghes {
77 }; 89 };
78}; 90};
79 91
92struct ghes_estatus_node {
93 struct llist_node llnode;
94 struct acpi_hest_generic *generic;
95};
96
80int ghes_disable; 97int ghes_disable;
81module_param_named(disable, ghes_disable, bool, 0); 98module_param_named(disable, ghes_disable, bool, 0);
82 99
@@ -124,6 +141,19 @@ static struct vm_struct *ghes_ioremap_area;
124static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); 141static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
125static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); 142static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
126 143
144/*
145 * printk is not safe in NMI context. So in NMI handler, we allocate
146 * required memory from lock-less memory allocator
147 * (ghes_estatus_pool), save estatus into it, put them into lock-less
148 * list (ghes_estatus_llist), then delay printk into IRQ context via
149 * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record
150 * required pool size by all NMI error source.
151 */
152static struct gen_pool *ghes_estatus_pool;
153static unsigned long ghes_estatus_pool_size_request;
154static struct llist_head ghes_estatus_llist;
155static struct irq_work ghes_proc_irq_work;
156
127static int ghes_ioremap_init(void) 157static int ghes_ioremap_init(void)
128{ 158{
129 ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, 159 ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
@@ -183,6 +213,55 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
183 __flush_tlb_one(vaddr); 213 __flush_tlb_one(vaddr);
184} 214}
185 215
216static int ghes_estatus_pool_init(void)
217{
218 ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
219 if (!ghes_estatus_pool)
220 return -ENOMEM;
221 return 0;
222}
223
224static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
225 struct gen_pool_chunk *chunk,
226 void *data)
227{
228 free_page(chunk->start_addr);
229}
230
231static void ghes_estatus_pool_exit(void)
232{
233 gen_pool_for_each_chunk(ghes_estatus_pool,
234 ghes_estatus_pool_free_chunk_page, NULL);
235 gen_pool_destroy(ghes_estatus_pool);
236}
237
238static int ghes_estatus_pool_expand(unsigned long len)
239{
240 unsigned long i, pages, size, addr;
241 int ret;
242
243 ghes_estatus_pool_size_request += PAGE_ALIGN(len);
244 size = gen_pool_size(ghes_estatus_pool);
245 if (size >= ghes_estatus_pool_size_request)
246 return 0;
247 pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
248 for (i = 0; i < pages; i++) {
249 addr = __get_free_page(GFP_KERNEL);
250 if (!addr)
251 return -ENOMEM;
252 ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
253 if (ret)
254 return ret;
255 }
256
257 return 0;
258}
259
260static void ghes_estatus_pool_shrink(unsigned long len)
261{
262 ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
263}
264
186static struct ghes *ghes_new(struct acpi_hest_generic *generic) 265static struct ghes *ghes_new(struct acpi_hest_generic *generic)
187{ 266{
188 struct ghes *ghes; 267 struct ghes *ghes;
@@ -344,13 +423,13 @@ static void ghes_clear_estatus(struct ghes *ghes)
344 ghes->flags &= ~GHES_TO_CLEAR; 423 ghes->flags &= ~GHES_TO_CLEAR;
345} 424}
346 425
347static void ghes_do_proc(struct ghes *ghes) 426static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
348{ 427{
349 int sev, processed = 0; 428 int sev, processed = 0;
350 struct acpi_hest_generic_data *gdata; 429 struct acpi_hest_generic_data *gdata;
351 430
352 sev = ghes_severity(ghes->estatus->error_severity); 431 sev = ghes_severity(estatus->error_severity);
353 apei_estatus_for_each_section(ghes->estatus, gdata) { 432 apei_estatus_for_each_section(estatus, gdata) {
354#ifdef CONFIG_X86_MCE 433#ifdef CONFIG_X86_MCE
355 if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, 434 if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
356 CPER_SEC_PLATFORM_MEM)) { 435 CPER_SEC_PLATFORM_MEM)) {
@@ -363,27 +442,37 @@ static void ghes_do_proc(struct ghes *ghes)
363 } 442 }
364} 443}
365 444
366static void __ghes_print_estatus(const char *pfx, struct ghes *ghes) 445static void __ghes_print_estatus(const char *pfx,
446 const struct acpi_hest_generic *generic,
447 const struct acpi_hest_generic_status *estatus)
367{ 448{
368 if (pfx == NULL) { 449 if (pfx == NULL) {
369 if (ghes_severity(ghes->estatus->error_severity) <= 450 if (ghes_severity(estatus->error_severity) <=
370 GHES_SEV_CORRECTED) 451 GHES_SEV_CORRECTED)
371 pfx = KERN_WARNING HW_ERR; 452 pfx = KERN_WARNING HW_ERR;
372 else 453 else
373 pfx = KERN_ERR HW_ERR; 454 pfx = KERN_ERR HW_ERR;
374 } 455 }
375 printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", 456 printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
376 pfx, ghes->generic->header.source_id); 457 pfx, generic->header.source_id);
377 apei_estatus_print(pfx, ghes->estatus); 458 apei_estatus_print(pfx, estatus);
378} 459}
379 460
380static void ghes_print_estatus(const char *pfx, struct ghes *ghes) 461static void ghes_print_estatus(const char *pfx,
462 const struct acpi_hest_generic *generic,
463 const struct acpi_hest_generic_status *estatus)
381{ 464{
382 /* Not more than 2 messages every 5 seconds */ 465 /* Not more than 2 messages every 5 seconds */
383 static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2); 466 static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
467 static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
468 struct ratelimit_state *ratelimit;
384 469
385 if (__ratelimit(&ratelimit)) 470 if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
386 __ghes_print_estatus(pfx, ghes); 471 ratelimit = &ratelimit_corrected;
472 else
473 ratelimit = &ratelimit_uncorrected;
474 if (__ratelimit(ratelimit))
475 __ghes_print_estatus(pfx, generic, estatus);
387} 476}
388 477
389static int ghes_proc(struct ghes *ghes) 478static int ghes_proc(struct ghes *ghes)
@@ -393,8 +482,8 @@ static int ghes_proc(struct ghes *ghes)
393 rc = ghes_read_estatus(ghes, 0); 482 rc = ghes_read_estatus(ghes, 0);
394 if (rc) 483 if (rc)
395 goto out; 484 goto out;
396 ghes_print_estatus(NULL, ghes); 485 ghes_print_estatus(NULL, ghes->generic, ghes->estatus);
397 ghes_do_proc(ghes); 486 ghes_do_proc(ghes->estatus);
398 487
399out: 488out:
400 ghes_clear_estatus(ghes); 489 ghes_clear_estatus(ghes);
@@ -453,6 +542,40 @@ static int ghes_notify_sci(struct notifier_block *this,
453 return ret; 542 return ret;
454} 543}
455 544
545static void ghes_proc_in_irq(struct irq_work *irq_work)
546{
547 struct llist_node *llnode, *next, *tail = NULL;
548 struct ghes_estatus_node *estatus_node;
549 struct acpi_hest_generic_status *estatus;
550 u32 len, node_len;
551
552 /*
553 * Because the time order of estatus in list is reversed,
554 * revert it back to proper order.
555 */
556 llnode = llist_del_all(&ghes_estatus_llist);
557 while (llnode) {
558 next = llnode->next;
559 llnode->next = tail;
560 tail = llnode;
561 llnode = next;
562 }
563 llnode = tail;
564 while (llnode) {
565 next = llnode->next;
566 estatus_node = llist_entry(llnode, struct ghes_estatus_node,
567 llnode);
568 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
569 len = apei_estatus_len(estatus);
570 node_len = GHES_ESTATUS_NODE_LEN(len);
571 ghes_do_proc(estatus);
572 ghes_print_estatus(NULL, estatus_node->generic, estatus);
573 gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
574 node_len);
575 llnode = next;
576 }
577}
578
456static int ghes_notify_nmi(struct notifier_block *this, 579static int ghes_notify_nmi(struct notifier_block *this,
457 unsigned long cmd, void *data) 580 unsigned long cmd, void *data)
458{ 581{
@@ -482,7 +605,8 @@ static int ghes_notify_nmi(struct notifier_block *this,
482 605
483 if (sev_global >= GHES_SEV_PANIC) { 606 if (sev_global >= GHES_SEV_PANIC) {
484 oops_begin(); 607 oops_begin();
485 __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global); 608 __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic,
609 ghes_global->estatus);
486 /* reboot to log the error! */ 610 /* reboot to log the error! */
487 if (panic_timeout == 0) 611 if (panic_timeout == 0)
488 panic_timeout = ghes_panic_timeout; 612 panic_timeout = ghes_panic_timeout;
@@ -490,12 +614,31 @@ static int ghes_notify_nmi(struct notifier_block *this,
490 } 614 }
491 615
492 list_for_each_entry_rcu(ghes, &ghes_nmi, list) { 616 list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
617#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
618 u32 len, node_len;
619 struct ghes_estatus_node *estatus_node;
620 struct acpi_hest_generic_status *estatus;
621#endif
493 if (!(ghes->flags & GHES_TO_CLEAR)) 622 if (!(ghes->flags & GHES_TO_CLEAR))
494 continue; 623 continue;
495 /* Do not print estatus because printk is not NMI safe */ 624#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
496 ghes_do_proc(ghes); 625 /* Save estatus for further processing in IRQ context */
626 len = apei_estatus_len(ghes->estatus);
627 node_len = GHES_ESTATUS_NODE_LEN(len);
628 estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
629 node_len);
630 if (estatus_node) {
631 estatus_node->generic = ghes->generic;
632 estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
633 memcpy(estatus, ghes->estatus, len);
634 llist_add(&estatus_node->llnode, &ghes_estatus_llist);
635 }
636#endif
497 ghes_clear_estatus(ghes); 637 ghes_clear_estatus(ghes);
498 } 638 }
639#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
640 irq_work_queue(&ghes_proc_irq_work);
641#endif
499 642
500out: 643out:
501 raw_spin_unlock(&ghes_nmi_lock); 644 raw_spin_unlock(&ghes_nmi_lock);
@@ -510,10 +653,26 @@ static struct notifier_block ghes_notifier_nmi = {
510 .notifier_call = ghes_notify_nmi, 653 .notifier_call = ghes_notify_nmi,
511}; 654};
512 655
656static unsigned long ghes_esource_prealloc_size(
657 const struct acpi_hest_generic *generic)
658{
659 unsigned long block_length, prealloc_records, prealloc_size;
660
661 block_length = min_t(unsigned long, generic->error_block_length,
662 GHES_ESTATUS_MAX_SIZE);
663 prealloc_records = max_t(unsigned long,
664 generic->records_to_preallocate, 1);
665 prealloc_size = min_t(unsigned long, block_length * prealloc_records,
666 GHES_ESOURCE_PREALLOC_MAX_SIZE);
667
668 return prealloc_size;
669}
670
513static int __devinit ghes_probe(struct platform_device *ghes_dev) 671static int __devinit ghes_probe(struct platform_device *ghes_dev)
514{ 672{
515 struct acpi_hest_generic *generic; 673 struct acpi_hest_generic *generic;
516 struct ghes *ghes = NULL; 674 struct ghes *ghes = NULL;
675 unsigned long len;
517 int rc = -EINVAL; 676 int rc = -EINVAL;
518 677
519 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; 678 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
@@ -579,6 +738,8 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev)
579 mutex_unlock(&ghes_list_mutex); 738 mutex_unlock(&ghes_list_mutex);
580 break; 739 break;
581 case ACPI_HEST_NOTIFY_NMI: 740 case ACPI_HEST_NOTIFY_NMI:
741 len = ghes_esource_prealloc_size(generic);
742 ghes_estatus_pool_expand(len);
582 mutex_lock(&ghes_list_mutex); 743 mutex_lock(&ghes_list_mutex);
583 if (list_empty(&ghes_nmi)) 744 if (list_empty(&ghes_nmi))
584 register_die_notifier(&ghes_notifier_nmi); 745 register_die_notifier(&ghes_notifier_nmi);
@@ -603,6 +764,7 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
603{ 764{
604 struct ghes *ghes; 765 struct ghes *ghes;
605 struct acpi_hest_generic *generic; 766 struct acpi_hest_generic *generic;
767 unsigned long len;
606 768
607 ghes = platform_get_drvdata(ghes_dev); 769 ghes = platform_get_drvdata(ghes_dev);
608 generic = ghes->generic; 770 generic = ghes->generic;
@@ -633,6 +795,8 @@ static int __devexit ghes_remove(struct platform_device *ghes_dev)
633 * freed after NMI handler finishes. 795 * freed after NMI handler finishes.
634 */ 796 */
635 synchronize_rcu(); 797 synchronize_rcu();
798 len = ghes_esource_prealloc_size(generic);
799 ghes_estatus_pool_shrink(len);
636 break; 800 break;
637 default: 801 default:
638 BUG(); 802 BUG();
@@ -673,14 +837,20 @@ static int __init ghes_init(void)
673 return -EINVAL; 837 return -EINVAL;
674 } 838 }
675 839
840 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
841
676 rc = ghes_ioremap_init(); 842 rc = ghes_ioremap_init();
677 if (rc) 843 if (rc)
678 goto err; 844 goto err;
679 845
680 rc = platform_driver_register(&ghes_platform_driver); 846 rc = ghes_estatus_pool_init();
681 if (rc) 847 if (rc)
682 goto err_ioremap_exit; 848 goto err_ioremap_exit;
683 849
850 rc = platform_driver_register(&ghes_platform_driver);
851 if (rc)
852 goto err_pool_exit;
853
684 rc = apei_osc_setup(); 854 rc = apei_osc_setup();
685 if (rc == 0 && osc_sb_apei_support_acked) 855 if (rc == 0 && osc_sb_apei_support_acked)
686 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); 856 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
@@ -692,6 +862,8 @@ static int __init ghes_init(void)
692 pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); 862 pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
693 863
694 return 0; 864 return 0;
865err_pool_exit:
866 ghes_estatus_pool_exit();
695err_ioremap_exit: 867err_ioremap_exit:
696 ghes_ioremap_exit(); 868 ghes_ioremap_exit();
697err: 869err:
@@ -701,6 +873,7 @@ err:
701static void __exit ghes_exit(void) 873static void __exit ghes_exit(void)
702{ 874{
703 platform_driver_unregister(&ghes_platform_driver); 875 platform_driver_unregister(&ghes_platform_driver);
876 ghes_estatus_pool_exit();
704 ghes_ioremap_exit(); 877 ghes_ioremap_exit();
705} 878}
706 879