aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLen Brown <len.brown@intel.com>2011-03-22 01:41:47 -0400
committerLen Brown <len.brown@intel.com>2011-03-22 01:41:47 -0400
commit25076246e80c0c48cc4c9115335b83343b9dc727 (patch)
treec7b462c6b4f67227722135a7a419ad110a6fd93e
parent05534c9ffc9d5d950b14de8ba49a7609dc59b0b8 (diff)
parentc413d7682020a127f54744a1b30f597692aea1fd (diff)
Merge branch 'apei-release' into release
-rw-r--r--Documentation/acpi/apei/output_format.txt25
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c42
-rw-r--r--drivers/acpi/apei/Kconfig7
-rw-r--r--drivers/acpi/apei/cper.c18
-rw-r--r--drivers/acpi/apei/erst-dbg.c24
-rw-r--r--drivers/acpi/apei/erst.c235
-rw-r--r--drivers/pci/pcie/aer/aerdrv.h9
-rw-r--r--drivers/pci/pcie/aer/aerdrv_errprint.c182
-rw-r--r--include/acpi/apei.h5
-rw-r--r--include/linux/aer.h24
-rw-r--r--include/linux/cper.h2
11 files changed, 421 insertions, 152 deletions
diff --git a/Documentation/acpi/apei/output_format.txt b/Documentation/acpi/apei/output_format.txt
index 9146952c612a..0c49c197c47a 100644
--- a/Documentation/acpi/apei/output_format.txt
+++ b/Documentation/acpi/apei/output_format.txt
@@ -92,6 +92,11 @@ vendor_id: <integer>, device_id: <integer>
92class_code: <integer>] 92class_code: <integer>]
93[serial number: <integer>, <integer>] 93[serial number: <integer>, <integer>]
94[bridge: secondary_status: <integer>, control: <integer>] 94[bridge: secondary_status: <integer>, control: <integer>]
95[aer_status: <integer>, aer_mask: <integer>
96<aer status string>
97[aer_uncor_severity: <integer>]
98aer_layer=<aer layer string>, aer_agent=<aer agent string>
99aer_tlp_header: <integer> <integer> <integer> <integer>]
95 100
96<pcie port type string>* := PCIe end point | legacy PCI end point | \ 101<pcie port type string>* := PCIe end point | legacy PCI end point | \
97unknown | unknown | root port | upstream switch port | \ 102unknown | unknown | root port | upstream switch port | \
@@ -99,6 +104,26 @@ downstream switch port | PCIe to PCI/PCI-X bridge | \
99PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \ 104PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \
100root complex event collector 105root complex event collector
101 106
107if section severity is fatal or recoverable
108<aer status string># :=
109unknown | unknown | unknown | unknown | Data Link Protocol | \
110unknown | unknown | unknown | unknown | unknown | unknown | unknown | \
111Poisoned TLP | Flow Control Protocol | Completion Timeout | \
112Completer Abort | Unexpected Completion | Receiver Overflow | \
113Malformed TLP | ECRC | Unsupported Request
114else
115<aer status string># :=
116Receiver Error | unknown | unknown | unknown | unknown | unknown | \
117Bad TLP | Bad DLLP | RELAY_NUM Rollover | unknown | unknown | unknown | \
118Replay Timer Timeout | Advisory Non-Fatal
119fi
120
121<aer layer string> :=
122Physical Layer | Data Link Layer | Transaction Layer
123
124<aer agent string> :=
125Receiver ID | Requester ID | Completer ID | Transmitter ID
126
102Where, [] designate corresponding content is optional 127Where, [] designate corresponding content is optional
103 128
104All <field string> description with * has the following format: 129All <field string> description with * has the following format:
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index 8209472b27a5..83930deec3c6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -106,24 +106,34 @@ int apei_write_mce(struct mce *m)
106ssize_t apei_read_mce(struct mce *m, u64 *record_id) 106ssize_t apei_read_mce(struct mce *m, u64 *record_id)
107{ 107{
108 struct cper_mce_record rcd; 108 struct cper_mce_record rcd;
109 ssize_t len; 109 int rc, pos;
110 110
111 len = erst_read_next(&rcd.hdr, sizeof(rcd)); 111 rc = erst_get_record_id_begin(&pos);
112 if (len <= 0) 112 if (rc)
113 return len; 113 return rc;
114 /* Can not skip other records in storage via ERST unless clear them */ 114retry:
115 else if (len != sizeof(rcd) || 115 rc = erst_get_record_id_next(&pos, record_id);
116 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { 116 if (rc)
117 if (printk_ratelimit()) 117 goto out;
118 pr_warning( 118 /* no more record */
119 "MCE-APEI: Can not skip the unknown record in ERST"); 119 if (*record_id == APEI_ERST_INVALID_RECORD_ID)
120 return -EIO; 120 goto out;
121 } 121 rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
122 122 /* someone else has cleared the record, try next one */
123 if (rc == -ENOENT)
124 goto retry;
125 else if (rc < 0)
126 goto out;
127 /* try to skip other type records in storage */
128 else if (rc != sizeof(rcd) ||
129 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
130 goto retry;
123 memcpy(m, &rcd.mce, sizeof(*m)); 131 memcpy(m, &rcd.mce, sizeof(*m));
124 *record_id = rcd.hdr.record_id; 132 rc = sizeof(*m);
133out:
134 erst_get_record_id_end();
125 135
126 return sizeof(*m); 136 return rc;
127} 137}
128 138
129/* Check whether there is record in ERST */ 139/* Check whether there is record in ERST */
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index fca34ccfd294..9ecf6feae830 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -21,6 +21,13 @@ config ACPI_APEI_GHES
21 by firmware to produce more valuable hardware error 21 by firmware to produce more valuable hardware error
22 information for Linux. 22 information for Linux.
23 23
24config ACPI_APEI_PCIEAER
25 bool "APEI PCIe AER logging/recovering support"
26 depends on ACPI_APEI && PCIEAER
27 help
28 PCIe AER errors may be reported via APEI firmware first mode.
29 Turn on this option to enable the corresponding support.
30
24config ACPI_APEI_EINJ 31config ACPI_APEI_EINJ
25 tristate "APEI Error INJection (EINJ)" 32 tristate "APEI Error INJection (EINJ)"
26 depends on ACPI_APEI && DEBUG_FS 33 depends on ACPI_APEI && DEBUG_FS
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index 31464a006d76..5d4189464d63 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -29,6 +29,7 @@
29#include <linux/time.h> 29#include <linux/time.h>
30#include <linux/cper.h> 30#include <linux/cper.h>
31#include <linux/acpi.h> 31#include <linux/acpi.h>
32#include <linux/aer.h>
32 33
33/* 34/*
34 * CPER record ID need to be unique even after reboot, because record 35 * CPER record ID need to be unique even after reboot, because record
@@ -70,8 +71,8 @@ static const char *cper_severity_str(unsigned int severity)
70 * If the output length is longer than 80, multiple line will be 71 * If the output length is longer than 80, multiple line will be
71 * printed, with @pfx is printed at the beginning of each line. 72 * printed, with @pfx is printed at the beginning of each line.
72 */ 73 */
73static void cper_print_bits(const char *pfx, unsigned int bits, 74void cper_print_bits(const char *pfx, unsigned int bits,
74 const char *strs[], unsigned int strs_size) 75 const char *strs[], unsigned int strs_size)
75{ 76{
76 int i, len = 0; 77 int i, len = 0;
77 const char *str; 78 const char *str;
@@ -81,6 +82,8 @@ static void cper_print_bits(const char *pfx, unsigned int bits,
81 if (!(bits & (1U << i))) 82 if (!(bits & (1U << i)))
82 continue; 83 continue;
83 str = strs[i]; 84 str = strs[i];
85 if (!str)
86 continue;
84 if (len && len + strlen(str) + 2 > 80) { 87 if (len && len + strlen(str) + 2 > 80) {
85 printk("%s\n", buf); 88 printk("%s\n", buf);
86 len = 0; 89 len = 0;
@@ -243,7 +246,8 @@ static const char *cper_pcie_port_type_strs[] = {
243 "root complex event collector", 246 "root complex event collector",
244}; 247};
245 248
246static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie) 249static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
250 const struct acpi_hest_generic_data *gdata)
247{ 251{
248 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) 252 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
249 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, 253 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
@@ -276,6 +280,12 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie)
276 printk( 280 printk(
277 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n", 281 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
278 pfx, pcie->bridge.secondary_status, pcie->bridge.control); 282 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
283#ifdef CONFIG_ACPI_APEI_PCIEAER
284 if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
285 struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
286 cper_print_aer(pfx, gdata->error_severity, aer_regs);
287 }
288#endif
279} 289}
280 290
281static const char *apei_estatus_section_flag_strs[] = { 291static const char *apei_estatus_section_flag_strs[] = {
@@ -322,7 +332,7 @@ static void apei_estatus_print_section(
322 struct cper_sec_pcie *pcie = (void *)(gdata + 1); 332 struct cper_sec_pcie *pcie = (void *)(gdata + 1);
323 printk("%s""section_type: PCIe error\n", pfx); 333 printk("%s""section_type: PCIe error\n", pfx);
324 if (gdata->error_data_length >= sizeof(*pcie)) 334 if (gdata->error_data_length >= sizeof(*pcie))
325 cper_print_pcie(pfx, pcie); 335 cper_print_pcie(pfx, pcie, gdata);
326 else 336 else
327 goto err_section_too_small; 337 goto err_section_too_small;
328 } else 338 } else
diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c
index de73caf3cebc..a4cfb64c86a1 100644
--- a/drivers/acpi/apei/erst-dbg.c
+++ b/drivers/acpi/apei/erst-dbg.c
@@ -43,12 +43,27 @@ static DEFINE_MUTEX(erst_dbg_mutex);
43 43
44static int erst_dbg_open(struct inode *inode, struct file *file) 44static int erst_dbg_open(struct inode *inode, struct file *file)
45{ 45{
46 int rc, *pos;
47
46 if (erst_disable) 48 if (erst_disable)
47 return -ENODEV; 49 return -ENODEV;
48 50
51 pos = (int *)&file->private_data;
52
53 rc = erst_get_record_id_begin(pos);
54 if (rc)
55 return rc;
56
49 return nonseekable_open(inode, file); 57 return nonseekable_open(inode, file);
50} 58}
51 59
60static int erst_dbg_release(struct inode *inode, struct file *file)
61{
62 erst_get_record_id_end();
63
64 return 0;
65}
66
52static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 67static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
53{ 68{
54 int rc; 69 int rc;
@@ -79,18 +94,20 @@ static long erst_dbg_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
79static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf, 94static ssize_t erst_dbg_read(struct file *filp, char __user *ubuf,
80 size_t usize, loff_t *off) 95 size_t usize, loff_t *off)
81{ 96{
82 int rc; 97 int rc, *pos;
83 ssize_t len = 0; 98 ssize_t len = 0;
84 u64 id; 99 u64 id;
85 100
86 if (*off != 0) 101 if (*off)
87 return -EINVAL; 102 return -EINVAL;
88 103
89 if (mutex_lock_interruptible(&erst_dbg_mutex) != 0) 104 if (mutex_lock_interruptible(&erst_dbg_mutex) != 0)
90 return -EINTR; 105 return -EINTR;
91 106
107 pos = (int *)&filp->private_data;
108
92retry_next: 109retry_next:
93 rc = erst_get_next_record_id(&id); 110 rc = erst_get_record_id_next(pos, &id);
94 if (rc) 111 if (rc)
95 goto out; 112 goto out;
96 /* no more record */ 113 /* no more record */
@@ -181,6 +198,7 @@ out:
181static const struct file_operations erst_dbg_ops = { 198static const struct file_operations erst_dbg_ops = {
182 .owner = THIS_MODULE, 199 .owner = THIS_MODULE,
183 .open = erst_dbg_open, 200 .open = erst_dbg_open,
201 .release = erst_dbg_release,
184 .read = erst_dbg_read, 202 .read = erst_dbg_read,
185 .write = erst_dbg_write, 203 .write = erst_dbg_write,
186 .unlocked_ioctl = erst_dbg_ioctl, 204 .unlocked_ioctl = erst_dbg_ioctl,
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index cf6db6b7662a..8ff8c32fef58 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -429,6 +429,22 @@ ssize_t erst_get_record_count(void)
429} 429}
430EXPORT_SYMBOL_GPL(erst_get_record_count); 430EXPORT_SYMBOL_GPL(erst_get_record_count);
431 431
432#define ERST_RECORD_ID_CACHE_SIZE_MIN 16
433#define ERST_RECORD_ID_CACHE_SIZE_MAX 1024
434
435struct erst_record_id_cache {
436 struct mutex lock;
437 u64 *entries;
438 int len;
439 int size;
440 int refcount;
441};
442
443static struct erst_record_id_cache erst_record_id_cache = {
444 .lock = __MUTEX_INITIALIZER(erst_record_id_cache.lock),
445 .refcount = 0,
446};
447
432static int __erst_get_next_record_id(u64 *record_id) 448static int __erst_get_next_record_id(u64 *record_id)
433{ 449{
434 struct apei_exec_context ctx; 450 struct apei_exec_context ctx;
@@ -443,26 +459,179 @@ static int __erst_get_next_record_id(u64 *record_id)
443 return 0; 459 return 0;
444} 460}
445 461
462int erst_get_record_id_begin(int *pos)
463{
464 int rc;
465
466 if (erst_disable)
467 return -ENODEV;
468
469 rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
470 if (rc)
471 return rc;
472 erst_record_id_cache.refcount++;
473 mutex_unlock(&erst_record_id_cache.lock);
474
475 *pos = 0;
476
477 return 0;
478}
479EXPORT_SYMBOL_GPL(erst_get_record_id_begin);
480
481/* erst_record_id_cache.lock must be held by caller */
482static int __erst_record_id_cache_add_one(void)
483{
484 u64 id, prev_id, first_id;
485 int i, rc;
486 u64 *entries;
487 unsigned long flags;
488
489 id = prev_id = first_id = APEI_ERST_INVALID_RECORD_ID;
490retry:
491 raw_spin_lock_irqsave(&erst_lock, flags);
492 rc = __erst_get_next_record_id(&id);
493 raw_spin_unlock_irqrestore(&erst_lock, flags);
494 if (rc == -ENOENT)
495 return 0;
496 if (rc)
497 return rc;
498 if (id == APEI_ERST_INVALID_RECORD_ID)
499 return 0;
500 /* can not skip current ID, or loop back to first ID */
501 if (id == prev_id || id == first_id)
502 return 0;
503 if (first_id == APEI_ERST_INVALID_RECORD_ID)
504 first_id = id;
505 prev_id = id;
506
507 entries = erst_record_id_cache.entries;
508 for (i = 0; i < erst_record_id_cache.len; i++) {
509 if (entries[i] == id)
510 break;
511 }
512 /* record id already in cache, try next */
513 if (i < erst_record_id_cache.len)
514 goto retry;
515 if (erst_record_id_cache.len >= erst_record_id_cache.size) {
516 int new_size, alloc_size;
517 u64 *new_entries;
518
519 new_size = erst_record_id_cache.size * 2;
520 new_size = clamp_val(new_size, ERST_RECORD_ID_CACHE_SIZE_MIN,
521 ERST_RECORD_ID_CACHE_SIZE_MAX);
522 if (new_size <= erst_record_id_cache.size) {
523 if (printk_ratelimit())
524 pr_warning(FW_WARN ERST_PFX
525 "too many record ID!\n");
526 return 0;
527 }
528 alloc_size = new_size * sizeof(entries[0]);
529 if (alloc_size < PAGE_SIZE)
530 new_entries = kmalloc(alloc_size, GFP_KERNEL);
531 else
532 new_entries = vmalloc(alloc_size);
533 if (!new_entries)
534 return -ENOMEM;
535 memcpy(new_entries, entries,
536 erst_record_id_cache.len * sizeof(entries[0]));
537 if (erst_record_id_cache.size < PAGE_SIZE)
538 kfree(entries);
539 else
540 vfree(entries);
541 erst_record_id_cache.entries = entries = new_entries;
542 erst_record_id_cache.size = new_size;
543 }
544 entries[i] = id;
545 erst_record_id_cache.len++;
546
547 return 1;
548}
549
446/* 550/*
447 * Get the record ID of an existing error record on the persistent 551 * Get the record ID of an existing error record on the persistent
448 * storage. If there is no error record on the persistent storage, the 552 * storage. If there is no error record on the persistent storage, the
449 * returned record_id is APEI_ERST_INVALID_RECORD_ID. 553 * returned record_id is APEI_ERST_INVALID_RECORD_ID.
450 */ 554 */
451int erst_get_next_record_id(u64 *record_id) 555int erst_get_record_id_next(int *pos, u64 *record_id)
452{ 556{
453 int rc; 557 int rc = 0;
454 unsigned long flags; 558 u64 *entries;
455 559
456 if (erst_disable) 560 if (erst_disable)
457 return -ENODEV; 561 return -ENODEV;
458 562
459 raw_spin_lock_irqsave(&erst_lock, flags); 563 /* must be enclosed by erst_get_record_id_begin/end */
460 rc = __erst_get_next_record_id(record_id); 564 BUG_ON(!erst_record_id_cache.refcount);
461 raw_spin_unlock_irqrestore(&erst_lock, flags); 565 BUG_ON(*pos < 0 || *pos > erst_record_id_cache.len);
566
567 mutex_lock(&erst_record_id_cache.lock);
568 entries = erst_record_id_cache.entries;
569 for (; *pos < erst_record_id_cache.len; (*pos)++)
570 if (entries[*pos] != APEI_ERST_INVALID_RECORD_ID)
571 break;
572 /* found next record id in cache */
573 if (*pos < erst_record_id_cache.len) {
574 *record_id = entries[*pos];
575 (*pos)++;
576 goto out_unlock;
577 }
578
579 /* Try to add one more record ID to cache */
580 rc = __erst_record_id_cache_add_one();
581 if (rc < 0)
582 goto out_unlock;
583 /* successfully add one new ID */
584 if (rc == 1) {
585 *record_id = erst_record_id_cache.entries[*pos];
586 (*pos)++;
587 rc = 0;
588 } else {
589 *pos = -1;
590 *record_id = APEI_ERST_INVALID_RECORD_ID;
591 }
592out_unlock:
593 mutex_unlock(&erst_record_id_cache.lock);
462 594
463 return rc; 595 return rc;
464} 596}
465EXPORT_SYMBOL_GPL(erst_get_next_record_id); 597EXPORT_SYMBOL_GPL(erst_get_record_id_next);
598
599/* erst_record_id_cache.lock must be held by caller */
600static void __erst_record_id_cache_compact(void)
601{
602 int i, wpos = 0;
603 u64 *entries;
604
605 if (erst_record_id_cache.refcount)
606 return;
607
608 entries = erst_record_id_cache.entries;
609 for (i = 0; i < erst_record_id_cache.len; i++) {
610 if (entries[i] == APEI_ERST_INVALID_RECORD_ID)
611 continue;
612 if (wpos != i)
613 memcpy(&entries[wpos], &entries[i], sizeof(entries[i]));
614 wpos++;
615 }
616 erst_record_id_cache.len = wpos;
617}
618
619void erst_get_record_id_end(void)
620{
621 /*
622 * erst_disable != 0 should be detected by invoker via the
623 * return value of erst_get_record_id_begin/next, so this
624 * function should not be called for erst_disable != 0.
625 */
626 BUG_ON(erst_disable);
627
628 mutex_lock(&erst_record_id_cache.lock);
629 erst_record_id_cache.refcount--;
630 BUG_ON(erst_record_id_cache.refcount < 0);
631 __erst_record_id_cache_compact();
632 mutex_unlock(&erst_record_id_cache.lock);
633}
634EXPORT_SYMBOL_GPL(erst_get_record_id_end);
466 635
467static int __erst_write_to_storage(u64 offset) 636static int __erst_write_to_storage(u64 offset)
468{ 637{
@@ -703,56 +872,34 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record,
703} 872}
704EXPORT_SYMBOL_GPL(erst_read); 873EXPORT_SYMBOL_GPL(erst_read);
705 874
706/*
707 * If return value > buflen, the buffer size is not big enough,
708 * else if return value = 0, there is no more record to read,
709 * else if return value < 0, something goes wrong,
710 * else everything is OK, and return value is record length
711 */
712ssize_t erst_read_next(struct cper_record_header *record, size_t buflen)
713{
714 int rc;
715 ssize_t len;
716 unsigned long flags;
717 u64 record_id;
718
719 if (erst_disable)
720 return -ENODEV;
721
722 raw_spin_lock_irqsave(&erst_lock, flags);
723 rc = __erst_get_next_record_id(&record_id);
724 if (rc) {
725 raw_spin_unlock_irqrestore(&erst_lock, flags);
726 return rc;
727 }
728 /* no more record */
729 if (record_id == APEI_ERST_INVALID_RECORD_ID) {
730 raw_spin_unlock_irqrestore(&erst_lock, flags);
731 return 0;
732 }
733
734 len = __erst_read(record_id, record, buflen);
735 raw_spin_unlock_irqrestore(&erst_lock, flags);
736
737 return len;
738}
739EXPORT_SYMBOL_GPL(erst_read_next);
740
741int erst_clear(u64 record_id) 875int erst_clear(u64 record_id)
742{ 876{
743 int rc; 877 int rc, i;
744 unsigned long flags; 878 unsigned long flags;
879 u64 *entries;
745 880
746 if (erst_disable) 881 if (erst_disable)
747 return -ENODEV; 882 return -ENODEV;
748 883
884 rc = mutex_lock_interruptible(&erst_record_id_cache.lock);
885 if (rc)
886 return rc;
749 raw_spin_lock_irqsave(&erst_lock, flags); 887 raw_spin_lock_irqsave(&erst_lock, flags);
750 if (erst_erange.attr & ERST_RANGE_NVRAM) 888 if (erst_erange.attr & ERST_RANGE_NVRAM)
751 rc = __erst_clear_from_nvram(record_id); 889 rc = __erst_clear_from_nvram(record_id);
752 else 890 else
753 rc = __erst_clear_from_storage(record_id); 891 rc = __erst_clear_from_storage(record_id);
754 raw_spin_unlock_irqrestore(&erst_lock, flags); 892 raw_spin_unlock_irqrestore(&erst_lock, flags);
755 893 if (rc)
894 goto out;
895 entries = erst_record_id_cache.entries;
896 for (i = 0; i < erst_record_id_cache.len; i++) {
897 if (entries[i] == record_id)
898 entries[i] = APEI_ERST_INVALID_RECORD_ID;
899 }
900 __erst_record_id_cache_compact();
901out:
902 mutex_unlock(&erst_record_id_cache.lock);
756 return rc; 903 return rc;
757} 904}
758EXPORT_SYMBOL_GPL(erst_clear); 905EXPORT_SYMBOL_GPL(erst_clear);
diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h
index 80c11d131499..3eb77080366a 100644
--- a/drivers/pci/pcie/aer/aerdrv.h
+++ b/drivers/pci/pcie/aer/aerdrv.h
@@ -35,13 +35,6 @@
35 PCI_ERR_UNC_UNX_COMP| \ 35 PCI_ERR_UNC_UNX_COMP| \
36 PCI_ERR_UNC_MALF_TLP) 36 PCI_ERR_UNC_MALF_TLP)
37 37
38struct header_log_regs {
39 unsigned int dw0;
40 unsigned int dw1;
41 unsigned int dw2;
42 unsigned int dw3;
43};
44
45#define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */ 38#define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */
46struct aer_err_info { 39struct aer_err_info {
47 struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; 40 struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
@@ -59,7 +52,7 @@ struct aer_err_info {
59 52
60 unsigned int status; /* COR/UNCOR Error Status */ 53 unsigned int status; /* COR/UNCOR Error Status */
61 unsigned int mask; /* COR/UNCOR Error Mask */ 54 unsigned int mask; /* COR/UNCOR Error Mask */
62 struct header_log_regs tlp; /* TLP Header */ 55 struct aer_header_log_regs tlp; /* TLP Header */
63}; 56};
64 57
65struct aer_err_source { 58struct aer_err_source {
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 9d3e4c8d0184..b07a42e0b350 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -19,6 +19,7 @@
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/pm.h> 20#include <linux/pm.h>
21#include <linux/suspend.h> 21#include <linux/suspend.h>
22#include <linux/cper.h>
22 23
23#include "aerdrv.h" 24#include "aerdrv.h"
24 25
@@ -57,86 +58,44 @@
57 (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \ 58 (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \
58 AER_TRANSACTION_LAYER_ERROR) 59 AER_TRANSACTION_LAYER_ERROR)
59 60
60#define AER_PR(info, pdev, fmt, args...) \
61 printk("%s%s %s: " fmt, (info->severity == AER_CORRECTABLE) ? \
62 KERN_WARNING : KERN_ERR, dev_driver_string(&pdev->dev), \
63 dev_name(&pdev->dev), ## args)
64
65/* 61/*
66 * AER error strings 62 * AER error strings
67 */ 63 */
68static char *aer_error_severity_string[] = { 64static const char *aer_error_severity_string[] = {
69 "Uncorrected (Non-Fatal)", 65 "Uncorrected (Non-Fatal)",
70 "Uncorrected (Fatal)", 66 "Uncorrected (Fatal)",
71 "Corrected" 67 "Corrected"
72}; 68};
73 69
74static char *aer_error_layer[] = { 70static const char *aer_error_layer[] = {
75 "Physical Layer", 71 "Physical Layer",
76 "Data Link Layer", 72 "Data Link Layer",
77 "Transaction Layer" 73 "Transaction Layer"
78}; 74};
79static char *aer_correctable_error_string[] = { 75
80 "Receiver Error ", /* Bit Position 0 */ 76static const char *aer_correctable_error_string[] = {
81 NULL, 77 "Receiver Error", /* Bit Position 0 */
82 NULL,
83 NULL,
84 NULL,
85 NULL,
86 "Bad TLP ", /* Bit Position 6 */
87 "Bad DLLP ", /* Bit Position 7 */
88 "RELAY_NUM Rollover ", /* Bit Position 8 */
89 NULL,
90 NULL,
91 NULL,
92 "Replay Timer Timeout ", /* Bit Position 12 */
93 "Advisory Non-Fatal ", /* Bit Position 13 */
94 NULL,
95 NULL,
96 NULL,
97 NULL,
98 NULL,
99 NULL,
100 NULL,
101 NULL,
102 NULL,
103 NULL,
104 NULL, 78 NULL,
105 NULL, 79 NULL,
106 NULL, 80 NULL,
107 NULL, 81 NULL,
108 NULL, 82 NULL,
83 "Bad TLP", /* Bit Position 6 */
84 "Bad DLLP", /* Bit Position 7 */
85 "RELAY_NUM Rollover", /* Bit Position 8 */
109 NULL, 86 NULL,
110 NULL, 87 NULL,
111 NULL, 88 NULL,
89 "Replay Timer Timeout", /* Bit Position 12 */
90 "Advisory Non-Fatal", /* Bit Position 13 */
112}; 91};
113 92
114static char *aer_uncorrectable_error_string[] = { 93static const char *aer_uncorrectable_error_string[] = {
115 NULL,
116 NULL,
117 NULL,
118 NULL,
119 "Data Link Protocol ", /* Bit Position 4 */
120 NULL,
121 NULL,
122 NULL,
123 NULL,
124 NULL,
125 NULL,
126 NULL,
127 "Poisoned TLP ", /* Bit Position 12 */
128 "Flow Control Protocol ", /* Bit Position 13 */
129 "Completion Timeout ", /* Bit Position 14 */
130 "Completer Abort ", /* Bit Position 15 */
131 "Unexpected Completion ", /* Bit Position 16 */
132 "Receiver Overflow ", /* Bit Position 17 */
133 "Malformed TLP ", /* Bit Position 18 */
134 "ECRC ", /* Bit Position 19 */
135 "Unsupported Request ", /* Bit Position 20 */
136 NULL, 94 NULL,
137 NULL, 95 NULL,
138 NULL, 96 NULL,
139 NULL, 97 NULL,
98 "Data Link Protocol", /* Bit Position 4 */
140 NULL, 99 NULL,
141 NULL, 100 NULL,
142 NULL, 101 NULL,
@@ -144,19 +103,29 @@ static char *aer_uncorrectable_error_string[] = {
144 NULL, 103 NULL,
145 NULL, 104 NULL,
146 NULL, 105 NULL,
106 "Poisoned TLP", /* Bit Position 12 */
107 "Flow Control Protocol", /* Bit Position 13 */
108 "Completion Timeout", /* Bit Position 14 */
109 "Completer Abort", /* Bit Position 15 */
110 "Unexpected Completion", /* Bit Position 16 */
111 "Receiver Overflow", /* Bit Position 17 */
112 "Malformed TLP", /* Bit Position 18 */
113 "ECRC", /* Bit Position 19 */
114 "Unsupported Request", /* Bit Position 20 */
147}; 115};
148 116
149static char *aer_agent_string[] = { 117static const char *aer_agent_string[] = {
150 "Receiver ID", 118 "Receiver ID",
151 "Requester ID", 119 "Requester ID",
152 "Completer ID", 120 "Completer ID",
153 "Transmitter ID" 121 "Transmitter ID"
154}; 122};
155 123
156static void __aer_print_error(struct aer_err_info *info, struct pci_dev *dev) 124static void __aer_print_error(const char *prefix,
125 struct aer_err_info *info)
157{ 126{
158 int i, status; 127 int i, status;
159 char *errmsg = NULL; 128 const char *errmsg = NULL;
160 129
161 status = (info->status & ~info->mask); 130 status = (info->status & ~info->mask);
162 131
@@ -165,15 +134,17 @@ static void __aer_print_error(struct aer_err_info *info, struct pci_dev *dev)
165 continue; 134 continue;
166 135
167 if (info->severity == AER_CORRECTABLE) 136 if (info->severity == AER_CORRECTABLE)
168 errmsg = aer_correctable_error_string[i]; 137 errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ?
138 aer_correctable_error_string[i] : NULL;
169 else 139 else
170 errmsg = aer_uncorrectable_error_string[i]; 140 errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
141 aer_uncorrectable_error_string[i] : NULL;
171 142
172 if (errmsg) 143 if (errmsg)
173 AER_PR(info, dev, " [%2d] %s%s\n", i, errmsg, 144 printk("%s"" [%2d] %-22s%s\n", prefix, i, errmsg,
174 info->first_error == i ? " (First)" : ""); 145 info->first_error == i ? " (First)" : "");
175 else 146 else
176 AER_PR(info, dev, " [%2d] Unknown Error Bit%s\n", i, 147 printk("%s"" [%2d] Unknown Error Bit%s\n", prefix, i,
177 info->first_error == i ? " (First)" : ""); 148 info->first_error == i ? " (First)" : "");
178 } 149 }
179} 150}
@@ -181,11 +152,15 @@ static void __aer_print_error(struct aer_err_info *info, struct pci_dev *dev)
181void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) 152void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
182{ 153{
183 int id = ((dev->bus->number << 8) | dev->devfn); 154 int id = ((dev->bus->number << 8) | dev->devfn);
155 char prefix[44];
156
157 snprintf(prefix, sizeof(prefix), "%s%s %s: ",
158 (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR,
159 dev_driver_string(&dev->dev), dev_name(&dev->dev));
184 160
185 if (info->status == 0) { 161 if (info->status == 0) {
186 AER_PR(info, dev, 162 printk("%s""PCIe Bus Error: severity=%s, type=Unaccessible, "
187 "PCIe Bus Error: severity=%s, type=Unaccessible, " 163 "id=%04x(Unregistered Agent ID)\n", prefix,
188 "id=%04x(Unregistered Agent ID)\n",
189 aer_error_severity_string[info->severity], id); 164 aer_error_severity_string[info->severity], id);
190 } else { 165 } else {
191 int layer, agent; 166 int layer, agent;
@@ -193,23 +168,22 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
193 layer = AER_GET_LAYER_ERROR(info->severity, info->status); 168 layer = AER_GET_LAYER_ERROR(info->severity, info->status);
194 agent = AER_GET_AGENT(info->severity, info->status); 169 agent = AER_GET_AGENT(info->severity, info->status);
195 170
196 AER_PR(info, dev, 171 printk("%s""PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n",
197 "PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n", 172 prefix, aer_error_severity_string[info->severity],
198 aer_error_severity_string[info->severity],
199 aer_error_layer[layer], id, aer_agent_string[agent]); 173 aer_error_layer[layer], id, aer_agent_string[agent]);
200 174
201 AER_PR(info, dev, 175 printk("%s"" device [%04x:%04x] error status/mask=%08x/%08x\n",
202 " device [%04x:%04x] error status/mask=%08x/%08x\n", 176 prefix, dev->vendor, dev->device,
203 dev->vendor, dev->device, info->status, info->mask); 177 info->status, info->mask);
204 178
205 __aer_print_error(info, dev); 179 __aer_print_error(prefix, info);
206 180
207 if (info->tlp_header_valid) { 181 if (info->tlp_header_valid) {
208 unsigned char *tlp = (unsigned char *) &info->tlp; 182 unsigned char *tlp = (unsigned char *) &info->tlp;
209 AER_PR(info, dev, " TLP Header:" 183 printk("%s"" TLP Header:"
210 " %02x%02x%02x%02x %02x%02x%02x%02x" 184 " %02x%02x%02x%02x %02x%02x%02x%02x"
211 " %02x%02x%02x%02x %02x%02x%02x%02x\n", 185 " %02x%02x%02x%02x %02x%02x%02x%02x\n",
212 *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, 186 prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
213 *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), 187 *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
214 *(tlp + 11), *(tlp + 10), *(tlp + 9), 188 *(tlp + 11), *(tlp + 10), *(tlp + 9),
215 *(tlp + 8), *(tlp + 15), *(tlp + 14), 189 *(tlp + 8), *(tlp + 15), *(tlp + 14),
@@ -218,8 +192,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
218 } 192 }
219 193
220 if (info->id && info->error_dev_num > 1 && info->id == id) 194 if (info->id && info->error_dev_num > 1 && info->id == id)
221 AER_PR(info, dev, 195 printk("%s"" Error of this Agent(%04x) is reported first\n",
222 " Error of this Agent(%04x) is reported first\n", id); 196 prefix, id);
223} 197}
224 198
225void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) 199void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
@@ -228,3 +202,61 @@ void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
228 info->multi_error_valid ? "Multiple " : "", 202 info->multi_error_valid ? "Multiple " : "",
229 aer_error_severity_string[info->severity], info->id); 203 aer_error_severity_string[info->severity], info->id);
230} 204}
205
206#ifdef CONFIG_ACPI_APEI_PCIEAER
207static int cper_severity_to_aer(int cper_severity)
208{
209 switch (cper_severity) {
210 case CPER_SEV_RECOVERABLE:
211 return AER_NONFATAL;
212 case CPER_SEV_FATAL:
213 return AER_FATAL;
214 default:
215 return AER_CORRECTABLE;
216 }
217}
218
219void cper_print_aer(const char *prefix, int cper_severity,
220 struct aer_capability_regs *aer)
221{
222 int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
223 u32 status, mask;
224 const char **status_strs;
225
226 aer_severity = cper_severity_to_aer(cper_severity);
227 if (aer_severity == AER_CORRECTABLE) {
228 status = aer->cor_status;
229 mask = aer->cor_mask;
230 status_strs = aer_correctable_error_string;
231 status_strs_size = ARRAY_SIZE(aer_correctable_error_string);
232 } else {
233 status = aer->uncor_status;
234 mask = aer->uncor_mask;
235 status_strs = aer_uncorrectable_error_string;
236 status_strs_size = ARRAY_SIZE(aer_uncorrectable_error_string);
237 tlp_header_valid = status & AER_LOG_TLP_MASKS;
238 }
239 layer = AER_GET_LAYER_ERROR(aer_severity, status);
240 agent = AER_GET_AGENT(aer_severity, status);
241 printk("%s""aer_status: 0x%08x, aer_mask: 0x%08x\n",
242 prefix, status, mask);
243 cper_print_bits(prefix, status, status_strs, status_strs_size);
244 printk("%s""aer_layer=%s, aer_agent=%s\n", prefix,
245 aer_error_layer[layer], aer_agent_string[agent]);
246 if (aer_severity != AER_CORRECTABLE)
247 printk("%s""aer_uncor_severity: 0x%08x\n",
248 prefix, aer->uncor_severity);
249 if (tlp_header_valid) {
250 const unsigned char *tlp;
251 tlp = (const unsigned char *)&aer->header_log;
252 printk("%s""aer_tlp_header:"
253 " %02x%02x%02x%02x %02x%02x%02x%02x"
254 " %02x%02x%02x%02x %02x%02x%02x%02x\n",
255 prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
256 *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
257 *(tlp + 11), *(tlp + 10), *(tlp + 9),
258 *(tlp + 8), *(tlp + 15), *(tlp + 14),
259 *(tlp + 13), *(tlp + 12));
260 }
261}
262#endif
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index c4dbb132d902..e67b523a50e1 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -30,10 +30,11 @@ int apei_hest_parse(apei_hest_func_t func, void *data);
30 30
31int erst_write(const struct cper_record_header *record); 31int erst_write(const struct cper_record_header *record);
32ssize_t erst_get_record_count(void); 32ssize_t erst_get_record_count(void);
33int erst_get_next_record_id(u64 *record_id); 33int erst_get_record_id_begin(int *pos);
34int erst_get_record_id_next(int *pos, u64 *record_id);
35void erst_get_record_id_end(void);
34ssize_t erst_read(u64 record_id, struct cper_record_header *record, 36ssize_t erst_read(u64 record_id, struct cper_record_header *record,
35 size_t buflen); 37 size_t buflen);
36ssize_t erst_read_next(struct cper_record_header *record, size_t buflen);
37int erst_clear(u64 record_id); 38int erst_clear(u64 record_id);
38 39
39#endif 40#endif
diff --git a/include/linux/aer.h b/include/linux/aer.h
index f7df1eefc107..8414de22a779 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -7,6 +7,28 @@
7#ifndef _AER_H_ 7#ifndef _AER_H_
8#define _AER_H_ 8#define _AER_H_
9 9
10struct aer_header_log_regs {
11 unsigned int dw0;
12 unsigned int dw1;
13 unsigned int dw2;
14 unsigned int dw3;
15};
16
17struct aer_capability_regs {
18 u32 header;
19 u32 uncor_status;
20 u32 uncor_mask;
21 u32 uncor_severity;
22 u32 cor_status;
23 u32 cor_mask;
24 u32 cap_control;
25 struct aer_header_log_regs header_log;
26 u32 root_command;
27 u32 root_status;
28 u16 cor_err_source;
29 u16 uncor_err_source;
30};
31
10#if defined(CONFIG_PCIEAER) 32#if defined(CONFIG_PCIEAER)
11/* pci-e port driver needs this function to enable aer */ 33/* pci-e port driver needs this function to enable aer */
12extern int pci_enable_pcie_error_reporting(struct pci_dev *dev); 34extern int pci_enable_pcie_error_reporting(struct pci_dev *dev);
@@ -27,5 +49,7 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
27} 49}
28#endif 50#endif
29 51
52extern void cper_print_aer(const char *prefix, int cper_severity,
53 struct aer_capability_regs *aer);
30#endif //_AER_H_ 54#endif //_AER_H_
31 55
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 3104aaff5dd0..372a25839fd1 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -388,5 +388,7 @@ struct cper_sec_pcie {
388#pragma pack() 388#pragma pack()
389 389
390u64 cper_next_record_id(void); 390u64 cper_next_record_id(void);
391void cper_print_bits(const char *prefix, unsigned int bits,
392 const char *strs[], unsigned int strs_size);
391 393
392#endif 394#endif