aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/pci/pcie
diff options
context:
space:
mode:
authorVijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com>2012-11-17 06:47:18 -0500
committerBjorn Helgaas <bhelgaas@google.com>2012-11-26 16:46:28 -0500
commit918b4053184c0ca22236e70e299c5343eea35304 (patch)
tree7597202f834b5e8c7a42c226894215cbeeb9ef91 /drivers/pci/pcie
parent71fbad6c9a28629b6af40b0ff48f36c6610a1394 (diff)
PCI/AER: Report success only when every device has AER-aware driver
When an error is detected on a PCIe device which does not have an AER-aware driver, prevent AER infrastructure from reporting successful error recovery. This is because the report_error_detected() function that gets called in the first phase of recovery process allows forward progress even when the driver for the device does not have AER capabilities. It seems that all callbacks (in pci_error_handlers structure) registered by drivers that gets called during error recovery are not mandatory. So the intention of the infrastructure design seems to be to allow forward progress even when a specific callback has not been registered by a driver. However, if error handler structure itself has not been registered, it doesn't make sense to allow forward progress. As a result of the current design, in the case of a single device having an AER-unaware driver or in the case of any function in a multi-function card having an AER-unaware driver, a successful recovery is reported. Typical scenario this happens is when a PCI device is detached from a KVM host and the pci-stub driver on the host claims the device. The pci-stub driver does not have error handling capabilities but the AER infrastructure still reports that the device recovered successfully. The changes proposed here leaves the device(s)in an unrecovered state if the driver for the device or for any device in the subtree does not have error handler structure registered. This reflects the true state of the device and prevents any partial recovery (or no recovery at all) reported as successful. [bhelgaas: changelog] Signed-off-by: Vijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Linas Vepstas <linasvepstas@gmail.com> Reviewed-by: Myron Stowe <myron.stowe@redhat.com>
Diffstat (limited to 'drivers/pci/pcie')
-rw-r--r--drivers/pci/pcie/aer/aerdrv.h5
-rw-r--r--drivers/pci/pcie/aer/aerdrv_core.c21
2 files changed, 22 insertions, 4 deletions
diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h
index 94a7598eb262..22f840f4dda1 100644
--- a/drivers/pci/pcie/aer/aerdrv.h
+++ b/drivers/pci/pcie/aer/aerdrv.h
@@ -87,6 +87,9 @@ struct aer_broadcast_data {
87static inline pci_ers_result_t merge_result(enum pci_ers_result orig, 87static inline pci_ers_result_t merge_result(enum pci_ers_result orig,
88 enum pci_ers_result new) 88 enum pci_ers_result new)
89{ 89{
90 if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
91 return PCI_ERS_RESULT_NO_AER_DRIVER;
92
90 if (new == PCI_ERS_RESULT_NONE) 93 if (new == PCI_ERS_RESULT_NONE)
91 return orig; 94 return orig;
92 95
@@ -97,7 +100,7 @@ static inline pci_ers_result_t merge_result(enum pci_ers_result orig,
97 break; 100 break;
98 case PCI_ERS_RESULT_DISCONNECT: 101 case PCI_ERS_RESULT_DISCONNECT:
99 if (new == PCI_ERS_RESULT_NEED_RESET) 102 if (new == PCI_ERS_RESULT_NEED_RESET)
100 orig = new; 103 orig = PCI_ERS_RESULT_NEED_RESET;
101 break; 104 break;
102 default: 105 default:
103 break; 106 break;
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 06bad96af415..eb2f19a9c3cd 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -231,11 +231,26 @@ static int report_error_detected(struct pci_dev *dev, void *data)
231 dev->driver ? 231 dev->driver ?
232 "no AER-aware driver" : "no driver"); 232 "no AER-aware driver" : "no driver");
233 } 233 }
234 return 0; 234
235 /*
236 * If there's any device in the subtree that does not
237 * have an error_detected callback, returning
238 * PCI_ERS_RESULT_NO_AER_DRIVER prevents calling of
239 * the subsequent mmio_enabled/slot_reset/resume
240 * callbacks of "any" device in the subtree. All the
241 * devices in the subtree are left in the error state
242 * without recovery.
243 */
244
245 if (!(dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
246 vote = PCI_ERS_RESULT_NO_AER_DRIVER;
247 else
248 vote = PCI_ERS_RESULT_NONE;
249 } else {
250 err_handler = dev->driver->err_handler;
251 vote = err_handler->error_detected(dev, result_data->state);
235 } 252 }
236 253
237 err_handler = dev->driver->err_handler;
238 vote = err_handler->error_detected(dev, result_data->state);
239 result_data->result = merge_result(result_data->result, vote); 254 result_data->result = merge_result(result_data->result, vote);
240 return 0; 255 return 0;
241} 256}