diff options
author | Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> | 2009-09-07 04:16:59 -0400 |
---|---|---|
committer | Jesse Barnes <jbarnes@virtuousgeek.org> | 2009-09-09 16:50:13 -0400 |
commit | b1c089b7caf18905bd1d87136cf7b8c837254932 (patch) | |
tree | 52859a8cdd9479c31d8c013f8cb2145cfb5a1a8b | |
parent | 79e4b89be81b5e53bc4cb51788ca7a45cadb4ef3 (diff) |
PCI: pcie, aer: report all error before recovery
This patch is required not to lost error records by action invoked on
error recovery, such as slot reset etc.
Following sample (real machine + dummy record injected by aer-inject)
shows that record of 28:00.1 could not be retrieved by recovery of 28:00.0:
- Before:
pcieport-driver 0000:00:02.0: AER: Multiple Uncorrected (Non-Fatal) error received: id=2801
e1000e 0000:28:00.0: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2800(Receiver ID)
e1000e 0000:28:00.0: device [8086:1096] error status/mask=00001000/00100000
e1000e 0000:28:00.0: [12] Poisoned TLP (First)
e1000e 0000:28:00.0: TLP Header: 00000000 00000001 00000002 00000003
e1000e 0000:28:00.0: broadcast error_detected message
e1000e 0000:28:00.0: broadcast slot_reset message
e1000e 0000:28:00.0: setting latency timer to 64
e1000e 0000:28:00.0: restoring config space at offset 0x1 (was 0x100547, writing 0x100147)
e1000e 0000:28:00.0: PME# disabled
e1000e 0000:28:00.0: PME# disabled
e1000e 0000:28:00.1: setting latency timer to 64
e1000e 0000:28:00.1: restoring config space at offset 0x1 (was 0x100547, writing 0x100147)
e1000e 0000:28:00.1: PME# disabled
e1000e 0000:28:00.1: PME# disabled
e1000e 0000:28:00.0: broadcast resume message
e1000e 0000:28:00.0: AER driver successfully recovered
e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX
- After:
pcieport-driver 0000:00:02.0: AER: Multiple Uncorrected (Non-Fatal) error received: id=2801
e1000e 0000:28:00.0: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2800(Receiver ID)
e1000e 0000:28:00.0: device [8086:1096] error status/mask=00001000/00100000
e1000e 0000:28:00.0: [12] Poisoned TLP (First)
e1000e 0000:28:00.0: TLP Header: 00000000 00000001 00000002 00000003
e1000e 0000:28:00.1: PCIE Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, id=2801(Receiver ID)
e1000e 0000:28:00.1: device [8086:1096] error status/mask=00081000/00100000
e1000e 0000:28:00.1: [12] Poisoned TLP (First)
e1000e 0000:28:00.1: [19] ECRC
e1000e 0000:28:00.1: TLP Header: 00000000 00000001 00000002 00000003
e1000e 0000:28:00.1: Error of this Agent(2801) is reported first
e1000e 0000:28:00.0: broadcast error_detected message
e1000e 0000:28:00.0: broadcast slot_reset message
e1000e 0000:28:00.0: setting latency timer to 64
e1000e 0000:28:00.0: restoring config space at offset 0x1 (was 0x100547, writing 0x100147)
e1000e 0000:28:00.0: PME# disabled
e1000e 0000:28:00.0: PME# disabled
e1000e 0000:28:00.1: setting latency timer to 64
e1000e 0000:28:00.1: restoring config space at offset 0x1 (was 0x100547, writing 0x100147)
e1000e 0000:28:00.1: PME# disabled
e1000e 0000:28:00.1: PME# disabled
e1000e 0000:28:00.0: broadcast resume message
e1000e 0000:28:00.0: AER driver successfully recovered
e1000e: eth0 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
-rw-r--r-- | drivers/pci/pcie/aer/aerdrv.h | 2 | ||||
-rw-r--r-- | drivers/pci/pcie/aer/aerdrv_core.c | 29 |
2 files changed, 18 insertions, 13 deletions
diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index f9979eb56fb2..bd833ea3ba49 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h | |||
@@ -29,8 +29,6 @@ | |||
29 | #define ERR_COR_ID(d) (d & 0xffff) | 29 | #define ERR_COR_ID(d) (d & 0xffff) |
30 | #define ERR_UNCOR_ID(d) (d >> 16) | 30 | #define ERR_UNCOR_ID(d) (d >> 16) |
31 | 31 | ||
32 | #define AER_SUCCESS 0 | ||
33 | #define AER_UNSUCCESS 1 | ||
34 | #define AER_ERROR_SOURCES_MAX 100 | 32 | #define AER_ERROR_SOURCES_MAX 100 |
35 | 33 | ||
36 | #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ | 34 | #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ |
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index d9185cd2d900..9f5ccbeb4fa5 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c | |||
@@ -696,6 +696,13 @@ static struct aer_err_source *get_e_source(struct aer_rpc *rpc) | |||
696 | return e_source; | 696 | return e_source; |
697 | } | 697 | } |
698 | 698 | ||
699 | /** | ||
700 | * get_device_error_info - read error status from dev and store it to info | ||
701 | * @dev: pointer to the device expected to have a error record | ||
702 | * @info: pointer to structure to store the error record | ||
703 | * | ||
704 | * Return 1 on success, 0 on error. | ||
705 | */ | ||
699 | static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) | 706 | static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) |
700 | { | 707 | { |
701 | int pos, temp; | 708 | int pos, temp; |
@@ -707,7 +714,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) | |||
707 | 714 | ||
708 | /* The device might not support AER */ | 715 | /* The device might not support AER */ |
709 | if (!pos) | 716 | if (!pos) |
710 | return AER_SUCCESS; | 717 | return 1; |
711 | 718 | ||
712 | if (info->severity == AER_CORRECTABLE) { | 719 | if (info->severity == AER_CORRECTABLE) { |
713 | pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, | 720 | pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, |
@@ -715,7 +722,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) | |||
715 | pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, | 722 | pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, |
716 | &info->mask); | 723 | &info->mask); |
717 | if (!(info->status & ~info->mask)) | 724 | if (!(info->status & ~info->mask)) |
718 | return AER_UNSUCCESS; | 725 | return 0; |
719 | } else if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE || | 726 | } else if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE || |
720 | info->severity == AER_NONFATAL) { | 727 | info->severity == AER_NONFATAL) { |
721 | 728 | ||
@@ -725,7 +732,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) | |||
725 | pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, | 732 | pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, |
726 | &info->mask); | 733 | &info->mask); |
727 | if (!(info->status & ~info->mask)) | 734 | if (!(info->status & ~info->mask)) |
728 | return AER_UNSUCCESS; | 735 | return 0; |
729 | 736 | ||
730 | /* Get First Error Pointer */ | 737 | /* Get First Error Pointer */ |
731 | pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp); | 738 | pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp); |
@@ -744,7 +751,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) | |||
744 | } | 751 | } |
745 | } | 752 | } |
746 | 753 | ||
747 | return AER_SUCCESS; | 754 | return 1; |
748 | } | 755 | } |
749 | 756 | ||
750 | static inline void aer_process_err_devices(struct pcie_device *p_device, | 757 | static inline void aer_process_err_devices(struct pcie_device *p_device, |
@@ -758,14 +765,14 @@ static inline void aer_process_err_devices(struct pcie_device *p_device, | |||
758 | e_info->id); | 765 | e_info->id); |
759 | } | 766 | } |
760 | 767 | ||
768 | /* Report all before handle them, not to lost records by reset etc. */ | ||
761 | for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { | 769 | for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { |
762 | if (get_device_error_info(e_info->dev[i], e_info) == | 770 | if (get_device_error_info(e_info->dev[i], e_info)) |
763 | AER_SUCCESS) { | ||
764 | aer_print_error(e_info->dev[i], e_info); | 771 | aer_print_error(e_info->dev[i], e_info); |
765 | handle_error_source(p_device, | 772 | } |
766 | e_info->dev[i], | 773 | for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { |
767 | e_info); | 774 | if (get_device_error_info(e_info->dev[i], e_info)) |
768 | } | 775 | handle_error_source(p_device, e_info->dev[i], e_info); |
769 | } | 776 | } |
770 | } | 777 | } |
771 | 778 | ||
@@ -870,5 +877,5 @@ int aer_init(struct pcie_device *dev) | |||
870 | if (aer_osc_setup(dev) && !forceload) | 877 | if (aer_osc_setup(dev) && !forceload) |
871 | return -ENXIO; | 878 | return -ENXIO; |
872 | 879 | ||
873 | return AER_SUCCESS; | 880 | return 0; |
874 | } | 881 | } |