aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2011-05-17 04:08:37 -0400
committerJesse Barnes <jbarnes@virtuousgeek.org>2011-07-22 11:25:37 -0400
commit0918472ceeffad234df5589e45b646a94476f835 (patch)
tree3afd05b7710a56056cdf8273545990949dd553fa
parent0aba496fc820d7c36775f2fd0ef81994e1af67a8 (diff)
PCI: PCIe AER: add aer_recover_queue
In addition to native PCIe AER, now APEI (ACPI Platform Error Interface) GHES (Generic Hardware Error Source) can be used to report PCIe AER errors too. To add support to APEI GHES PCIe AER recovery, aer_recover_queue is added to export the recovery function in native PCIe AER driver. Recoverable PCIe AER errors are reported via NMI in APEI GHES. Then APEI GHES uses irq_work to delay the error processing into an IRQ handler. But PCIe AER recovery can be very time-consuming, so aer_recover_queue, which can be used in IRQ handler, delays the real recovery action into the process context, that is, work queue. Signed-off-by: Huang Ying <ying.huang@intel.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
-rw-r--r--drivers/pci/pcie/aer/aerdrv_core.c76
-rw-r--r--drivers/pci/pcie/aer/aerdrv_errprint.c3
-rw-r--r--include/linux/aer.h3
3 files changed, 74 insertions, 8 deletions
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 43421fbe080a..9674e9f30d49 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -24,6 +24,7 @@
24#include <linux/suspend.h> 24#include <linux/suspend.h>
25#include <linux/delay.h> 25#include <linux/delay.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/kfifo.h>
27#include "aerdrv.h" 28#include "aerdrv.h"
28 29
29static int forceload; 30static int forceload;
@@ -445,8 +446,7 @@ static struct pcie_port_service_driver *find_aer_service(struct pci_dev *dev)
445 return drv; 446 return drv;
446} 447}
447 448
448static pci_ers_result_t reset_link(struct pcie_device *aerdev, 449static pci_ers_result_t reset_link(struct pci_dev *dev)
449 struct pci_dev *dev)
450{ 450{
451 struct pci_dev *udev; 451 struct pci_dev *udev;
452 pci_ers_result_t status; 452 pci_ers_result_t status;
@@ -486,7 +486,6 @@ static pci_ers_result_t reset_link(struct pcie_device *aerdev,
486 486
487/** 487/**
488 * do_recovery - handle nonfatal/fatal error recovery process 488 * do_recovery - handle nonfatal/fatal error recovery process
489 * @aerdev: pointer to a pcie_device data structure of root port
490 * @dev: pointer to a pci_dev data structure of agent detecting an error 489 * @dev: pointer to a pci_dev data structure of agent detecting an error
491 * @severity: error severity type 490 * @severity: error severity type
492 * 491 *
@@ -494,8 +493,7 @@ static pci_ers_result_t reset_link(struct pcie_device *aerdev,
494 * error detected message to all downstream drivers within a hierarchy in 493 * error detected message to all downstream drivers within a hierarchy in
495 * question and return the returned code. 494 * question and return the returned code.
496 */ 495 */
497static void do_recovery(struct pcie_device *aerdev, struct pci_dev *dev, 496static void do_recovery(struct pci_dev *dev, int severity)
498 int severity)
499{ 497{
500 pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED; 498 pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
501 enum pci_channel_state state; 499 enum pci_channel_state state;
@@ -511,7 +509,7 @@ static void do_recovery(struct pcie_device *aerdev, struct pci_dev *dev,
511 report_error_detected); 509 report_error_detected);
512 510
513 if (severity == AER_FATAL) { 511 if (severity == AER_FATAL) {
514 result = reset_link(aerdev, dev); 512 result = reset_link(dev);
515 if (result != PCI_ERS_RESULT_RECOVERED) 513 if (result != PCI_ERS_RESULT_RECOVERED)
516 goto failed; 514 goto failed;
517 } 515 }
@@ -576,9 +574,73 @@ static void handle_error_source(struct pcie_device *aerdev,
576 pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, 574 pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
577 info->status); 575 info->status);
578 } else 576 } else
579 do_recovery(aerdev, dev, info->severity); 577 do_recovery(dev, info->severity);
580} 578}
581 579
580#ifdef CONFIG_ACPI_APEI_PCIEAER
581static void aer_recover_work_func(struct work_struct *work);
582
583#define AER_RECOVER_RING_ORDER 4
584#define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER)
585
586struct aer_recover_entry
587{
588 u8 bus;
589 u8 devfn;
590 u16 domain;
591 int severity;
592};
593
594static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry,
595 AER_RECOVER_RING_SIZE);
596/*
597 * Mutual exclusion for writers of aer_recover_ring, reader side don't
598 * need lock, because there is only one reader and lock is not needed
599 * between reader and writer.
600 */
601static DEFINE_SPINLOCK(aer_recover_ring_lock);
602static DECLARE_WORK(aer_recover_work, aer_recover_work_func);
603
604void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
605 int severity)
606{
607 unsigned long flags;
608 struct aer_recover_entry entry = {
609 .bus = bus,
610 .devfn = devfn,
611 .domain = domain,
612 .severity = severity,
613 };
614
615 spin_lock_irqsave(&aer_recover_ring_lock, flags);
616 if (kfifo_put(&aer_recover_ring, &entry))
617 schedule_work(&aer_recover_work);
618 else
619 pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n",
620 domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
621 spin_unlock_irqrestore(&aer_recover_ring_lock, flags);
622}
623EXPORT_SYMBOL_GPL(aer_recover_queue);
624
625static void aer_recover_work_func(struct work_struct *work)
626{
627 struct aer_recover_entry entry;
628 struct pci_dev *pdev;
629
630 while (kfifo_get(&aer_recover_ring, &entry)) {
631 pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
632 entry.devfn);
633 if (!pdev) {
634 pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n",
635 entry.domain, entry.bus,
636 PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
637 continue;
638 }
639 do_recovery(pdev, entry.severity);
640 }
641}
642#endif
643
582/** 644/**
583 * get_device_error_info - read error status from dev and store it to info 645 * get_device_error_info - read error status from dev and store it to info
584 * @dev: pointer to the device expected to have a error record 646 * @dev: pointer to the device expected to have a error record
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index b07a42e0b350..3ea51736f18d 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -204,7 +204,7 @@ void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
204} 204}
205 205
206#ifdef CONFIG_ACPI_APEI_PCIEAER 206#ifdef CONFIG_ACPI_APEI_PCIEAER
207static int cper_severity_to_aer(int cper_severity) 207int cper_severity_to_aer(int cper_severity)
208{ 208{
209 switch (cper_severity) { 209 switch (cper_severity) {
210 case CPER_SEV_RECOVERABLE: 210 case CPER_SEV_RECOVERABLE:
@@ -215,6 +215,7 @@ static int cper_severity_to_aer(int cper_severity)
215 return AER_CORRECTABLE; 215 return AER_CORRECTABLE;
216 } 216 }
217} 217}
218EXPORT_SYMBOL_GPL(cper_severity_to_aer);
218 219
219void cper_print_aer(const char *prefix, int cper_severity, 220void cper_print_aer(const char *prefix, int cper_severity,
220 struct aer_capability_regs *aer) 221 struct aer_capability_regs *aer)
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 8414de22a779..544abdb2238c 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -51,5 +51,8 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
51 51
52extern void cper_print_aer(const char *prefix, int cper_severity, 52extern void cper_print_aer(const char *prefix, int cper_severity,
53 struct aer_capability_regs *aer); 53 struct aer_capability_regs *aer);
54extern int cper_severity_to_aer(int cper_severity);
55extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
56 int severity);
54#endif //_AER_H_ 57#endif //_AER_H_
55 58