aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>2009-09-07 04:16:59 -0400
committerJesse Barnes <jbarnes@virtuousgeek.org>2009-09-09 16:50:13 -0400
commitb1c089b7caf18905bd1d87136cf7b8c837254932 (patch)
tree52859a8cdd9479c31d8c013f8cb2145cfb5a1a8b
parent79e4b89be81b5e53bc4cb51788ca7a45cadb4ef3 (diff)
PCI: pcie, aer: report all error before recovery
This patch is required not to lost error records by action invoked on error recovery, such as slot reset etc. Following sample (real machine + dummy record injected by aer-inject) shows that record of 28:00.1 could not be retrieved by recovery of 28:00.0: - Before: pcieport-driver 0000:00:02.0: AER: Multiple Uncorrected (Non-Fatal) error received: id=2801 e1000e 0000:28:00.0: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2800(Receiver ID) e1000e 0000:28:00.0: device [8086:1096] error status/mask=00001000/00100000 e1000e 0000:28:00.0: [12] Poisoned TLP (First) e1000e 0000:28:00.0: TLP Header: 00000000 00000001 00000002 00000003 e1000e 0000:28:00.0: broadcast error_detected message e1000e 0000:28:00.0: broadcast slot_reset message e1000e 0000:28:00.0: setting latency timer to 64 e1000e 0000:28:00.0: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.1: setting latency timer to 64 e1000e 0000:28:00.1: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.0: broadcast resume message e1000e 0000:28:00.0: AER driver successfully recovered e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX - After: pcieport-driver 0000:00:02.0: AER: Multiple Uncorrected (Non-Fatal) error received: id=2801 e1000e 0000:28:00.0: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2800(Receiver ID) e1000e 0000:28:00.0: device [8086:1096] error status/mask=00001000/00100000 e1000e 0000:28:00.0: [12] Poisoned TLP (First) e1000e 0000:28:00.0: TLP Header: 00000000 00000001 00000002 00000003 e1000e 0000:28:00.1: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2801(Receiver ID) e1000e 0000:28:00.1: device [8086:1096] error status/mask=00081000/00100000 e1000e 0000:28:00.1: [12] Poisoned TLP (First) e1000e 0000:28:00.1: [19] ECRC e1000e 0000:28:00.1: TLP Header: 00000000 00000001 00000002 00000003 e1000e 0000:28:00.1: Error of this Agent(2801) is reported first e1000e 0000:28:00.0: broadcast error_detected message e1000e 0000:28:00.0: broadcast slot_reset message e1000e 0000:28:00.0: setting latency timer to 64 e1000e 0000:28:00.0: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.0: PME# disabled e1000e 0000:28:00.1: setting latency timer to 64 e1000e 0000:28:00.1: restoring config space at offset 0x1 (was 0x100547, writing 0x100147) e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.1: PME# disabled e1000e 0000:28:00.0: broadcast resume message e1000e 0000:28:00.0: AER driver successfully recovered e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
-rw-r--r--drivers/pci/pcie/aer/aerdrv.h2
-rw-r--r--drivers/pci/pcie/aer/aerdrv_core.c29
2 files changed, 18 insertions, 13 deletions
diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h
index f9979eb56fb2..bd833ea3ba49 100644
--- a/drivers/pci/pcie/aer/aerdrv.h
+++ b/drivers/pci/pcie/aer/aerdrv.h
@@ -29,8 +29,6 @@
29#define ERR_COR_ID(d) (d & 0xffff) 29#define ERR_COR_ID(d) (d & 0xffff)
30#define ERR_UNCOR_ID(d) (d >> 16) 30#define ERR_UNCOR_ID(d) (d >> 16)
31 31
32#define AER_SUCCESS 0
33#define AER_UNSUCCESS 1
34#define AER_ERROR_SOURCES_MAX 100 32#define AER_ERROR_SOURCES_MAX 100
35 33
36#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ 34#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index d9185cd2d900..9f5ccbeb4fa5 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -696,6 +696,13 @@ static struct aer_err_source *get_e_source(struct aer_rpc *rpc)
696 return e_source; 696 return e_source;
697} 697}
698 698
699/**
700 * get_device_error_info - read error status from dev and store it to info
701 * @dev: pointer to the device expected to have a error record
702 * @info: pointer to structure to store the error record
703 *
704 * Return 1 on success, 0 on error.
705 */
699static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) 706static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
700{ 707{
701 int pos, temp; 708 int pos, temp;
@@ -707,7 +714,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
707 714
708 /* The device might not support AER */ 715 /* The device might not support AER */
709 if (!pos) 716 if (!pos)
710 return AER_SUCCESS; 717 return 1;
711 718
712 if (info->severity == AER_CORRECTABLE) { 719 if (info->severity == AER_CORRECTABLE) {
713 pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, 720 pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS,
@@ -715,7 +722,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
715 pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, 722 pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK,
716 &info->mask); 723 &info->mask);
717 if (!(info->status & ~info->mask)) 724 if (!(info->status & ~info->mask))
718 return AER_UNSUCCESS; 725 return 0;
719 } else if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE || 726 } else if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE ||
720 info->severity == AER_NONFATAL) { 727 info->severity == AER_NONFATAL) {
721 728
@@ -725,7 +732,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
725 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, 732 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK,
726 &info->mask); 733 &info->mask);
727 if (!(info->status & ~info->mask)) 734 if (!(info->status & ~info->mask))
728 return AER_UNSUCCESS; 735 return 0;
729 736
730 /* Get First Error Pointer */ 737 /* Get First Error Pointer */
731 pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp); 738 pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp);
@@ -744,7 +751,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
744 } 751 }
745 } 752 }
746 753
747 return AER_SUCCESS; 754 return 1;
748} 755}
749 756
750static inline void aer_process_err_devices(struct pcie_device *p_device, 757static inline void aer_process_err_devices(struct pcie_device *p_device,
@@ -758,14 +765,14 @@ static inline void aer_process_err_devices(struct pcie_device *p_device,
758 e_info->id); 765 e_info->id);
759 } 766 }
760 767
768 /* Report all before handle them, not to lost records by reset etc. */
761 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { 769 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
762 if (get_device_error_info(e_info->dev[i], e_info) == 770 if (get_device_error_info(e_info->dev[i], e_info))
763 AER_SUCCESS) {
764 aer_print_error(e_info->dev[i], e_info); 771 aer_print_error(e_info->dev[i], e_info);
765 handle_error_source(p_device, 772 }
766 e_info->dev[i], 773 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
767 e_info); 774 if (get_device_error_info(e_info->dev[i], e_info))
768 } 775 handle_error_source(p_device, e_info->dev[i], e_info);
769 } 776 }
770} 777}
771 778
@@ -870,5 +877,5 @@ int aer_init(struct pcie_device *dev)
870 if (aer_osc_setup(dev) && !forceload) 877 if (aer_osc_setup(dev) && !forceload)
871 return -ENXIO; 878 return -ENXIO;
872 879
873 return AER_SUCCESS; 880 return 0;
874} 881}