diff options
author | Eric Moore <eric.moore@lsi.com> | 2010-07-08 16:44:34 -0400 |
---|---|---|
committer | James Bottomley <James.Bottomley@suse.de> | 2010-07-27 13:04:03 -0400 |
commit | 3cb5469a2ab4b87a7c63dd218fdc1625bc73eccc (patch) | |
tree | 65b46a61abf39654b8a367ff40c1ec880ad606fa /drivers/scsi/mpt2sas/mpt2sas_scsih.c | |
parent | a2f1d139df42df6f3a2641591dea9e068b68f68c (diff) |
[SCSI] mpt2sas: driver fails to recover from injected PCIe bus errors
fixes surrounding PCIe enhanced error handling:
(1) We need to reject all request generated internaly inside the driver as well
as request arriving from the scsi mid layer when PCIe EEH is active. The fix is
to add a per adapter flag called pci_error_recovery which is checked thru out
the driver when request are generated.
(2) We don't need to call the pci_driver->remove directly from the PCIe
callbacks becuase its already called from the PCIe EEH code. In its place we are
shutting down the watchdog timer, and flushing back all pending IO.
(3) We need to save and restore the pci state across PCIe EEH handling.
Signed-off-by: Eric Moore <eric.moore@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Diffstat (limited to 'drivers/scsi/mpt2sas/mpt2sas_scsih.c')
-rw-r--r-- | drivers/scsi/mpt2sas/mpt2sas_scsih.c | 47 |
1 files changed, 35 insertions, 12 deletions
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index 854cc91e7aac..6273abd0535e 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c | |||
@@ -1997,7 +1997,8 @@ mpt2sas_scsih_issue_tm(struct MPT2SAS_ADAPTER *ioc, u16 handle, uint channel, | |||
1997 | goto err_out; | 1997 | goto err_out; |
1998 | } | 1998 | } |
1999 | 1999 | ||
2000 | if (ioc->shost_recovery || ioc->remove_host) { | 2000 | if (ioc->shost_recovery || ioc->remove_host || |
2001 | ioc->pci_error_recovery) { | ||
2001 | printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n", | 2002 | printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n", |
2002 | __func__, ioc->name); | 2003 | __func__, ioc->name); |
2003 | rc = FAILED; | 2004 | rc = FAILED; |
@@ -2644,7 +2645,8 @@ _scsih_tm_tr_send(struct MPT2SAS_ADAPTER *ioc, u16 handle) | |||
2644 | unsigned long flags; | 2645 | unsigned long flags; |
2645 | struct _tr_list *delayed_tr; | 2646 | struct _tr_list *delayed_tr; |
2646 | 2647 | ||
2647 | if (ioc->shost_recovery || ioc->remove_host) { | 2648 | if (ioc->shost_recovery || ioc->remove_host || |
2649 | ioc->pci_error_recovery) { | ||
2648 | dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " | 2650 | dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " |
2649 | "progress!\n", __func__, ioc->name)); | 2651 | "progress!\n", __func__, ioc->name)); |
2650 | return; | 2652 | return; |
@@ -2742,7 +2744,8 @@ _scsih_tm_tr_volume_send(struct MPT2SAS_ADAPTER *ioc, u16 handle) | |||
2742 | u16 smid; | 2744 | u16 smid; |
2743 | struct _tr_list *delayed_tr; | 2745 | struct _tr_list *delayed_tr; |
2744 | 2746 | ||
2745 | if (ioc->shost_recovery || ioc->remove_host) { | 2747 | if (ioc->shost_recovery || ioc->remove_host || |
2748 | ioc->pci_error_recovery) { | ||
2746 | dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " | 2749 | dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " |
2747 | "progress!\n", __func__, ioc->name)); | 2750 | "progress!\n", __func__, ioc->name)); |
2748 | return; | 2751 | return; |
@@ -2793,7 +2796,8 @@ _scsih_tm_volume_tr_complete(struct MPT2SAS_ADAPTER *ioc, u16 smid, | |||
2793 | Mpi2SCSITaskManagementReply_t *mpi_reply = | 2796 | Mpi2SCSITaskManagementReply_t *mpi_reply = |
2794 | mpt2sas_base_get_reply_virt_addr(ioc, reply); | 2797 | mpt2sas_base_get_reply_virt_addr(ioc, reply); |
2795 | 2798 | ||
2796 | if (ioc->shost_recovery || ioc->remove_host) { | 2799 | if (ioc->shost_recovery || ioc->remove_host || |
2800 | ioc->pci_error_recovery) { | ||
2797 | dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " | 2801 | dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " |
2798 | "progress!\n", __func__, ioc->name)); | 2802 | "progress!\n", __func__, ioc->name)); |
2799 | return 1; | 2803 | return 1; |
@@ -2845,7 +2849,8 @@ _scsih_tm_tr_complete(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index, | |||
2845 | Mpi2SasIoUnitControlRequest_t *mpi_request; | 2849 | Mpi2SasIoUnitControlRequest_t *mpi_request; |
2846 | u16 smid_sas_ctrl; | 2850 | u16 smid_sas_ctrl; |
2847 | 2851 | ||
2848 | if (ioc->shost_recovery || ioc->remove_host) { | 2852 | if (ioc->shost_recovery || ioc->remove_host || |
2853 | ioc->pci_error_recovery) { | ||
2849 | dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " | 2854 | dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " |
2850 | "progress!\n", __func__, ioc->name)); | 2855 | "progress!\n", __func__, ioc->name)); |
2851 | return 1; | 2856 | return 1; |
@@ -3187,7 +3192,10 @@ _scsih_flush_running_cmds(struct MPT2SAS_ADAPTER *ioc) | |||
3187 | count++; | 3192 | count++; |
3188 | mpt2sas_base_free_smid(ioc, smid); | 3193 | mpt2sas_base_free_smid(ioc, smid); |
3189 | scsi_dma_unmap(scmd); | 3194 | scsi_dma_unmap(scmd); |
3190 | scmd->result = DID_RESET << 16; | 3195 | if (ioc->pci_error_recovery) |
3196 | scmd->result = DID_NO_CONNECT << 16; | ||
3197 | else | ||
3198 | scmd->result = DID_RESET << 16; | ||
3191 | scmd->scsi_done(scmd); | 3199 | scmd->scsi_done(scmd); |
3192 | } | 3200 | } |
3193 | dtmprintk(ioc, printk(MPT2SAS_INFO_FMT "completing %d cmds\n", | 3201 | dtmprintk(ioc, printk(MPT2SAS_INFO_FMT "completing %d cmds\n", |
@@ -3324,6 +3332,12 @@ _scsih_qcmd(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *)) | |||
3324 | return 0; | 3332 | return 0; |
3325 | } | 3333 | } |
3326 | 3334 | ||
3335 | if (ioc->pci_error_recovery) { | ||
3336 | scmd->result = DID_NO_CONNECT << 16; | ||
3337 | scmd->scsi_done(scmd); | ||
3338 | return 0; | ||
3339 | } | ||
3340 | |||
3327 | sas_target_priv_data = sas_device_priv_data->sas_target; | 3341 | sas_target_priv_data = sas_device_priv_data->sas_target; |
3328 | /* invalid device handle */ | 3342 | /* invalid device handle */ |
3329 | if (sas_target_priv_data->handle == MPT2SAS_INVALID_DEVICE_HANDLE) { | 3343 | if (sas_target_priv_data->handle == MPT2SAS_INVALID_DEVICE_HANDLE) { |
@@ -4156,7 +4170,7 @@ _scsih_expander_add(struct MPT2SAS_ADAPTER *ioc, u16 handle) | |||
4156 | if (!handle) | 4170 | if (!handle) |
4157 | return -1; | 4171 | return -1; |
4158 | 4172 | ||
4159 | if (ioc->shost_recovery) | 4173 | if (ioc->shost_recovery || ioc->pci_error_recovery) |
4160 | return -1; | 4174 | return -1; |
4161 | 4175 | ||
4162 | if ((mpt2sas_config_get_expander_pg0(ioc, &mpi_reply, &expander_pg0, | 4176 | if ((mpt2sas_config_get_expander_pg0(ioc, &mpi_reply, &expander_pg0, |
@@ -4734,7 +4748,7 @@ _scsih_sas_topology_change_event(struct MPT2SAS_ADAPTER *ioc, | |||
4734 | _scsih_sas_topology_change_event_debug(ioc, event_data); | 4748 | _scsih_sas_topology_change_event_debug(ioc, event_data); |
4735 | #endif | 4749 | #endif |
4736 | 4750 | ||
4737 | if (ioc->shost_recovery || ioc->remove_host) | 4751 | if (ioc->shost_recovery || ioc->remove_host || ioc->pci_error_recovery) |
4738 | return; | 4752 | return; |
4739 | 4753 | ||
4740 | if (!ioc->sas_hba.num_phys) | 4754 | if (!ioc->sas_hba.num_phys) |
@@ -4773,7 +4787,8 @@ _scsih_sas_topology_change_event(struct MPT2SAS_ADAPTER *ioc, | |||
4773 | "expander event\n", ioc->name)); | 4787 | "expander event\n", ioc->name)); |
4774 | return; | 4788 | return; |
4775 | } | 4789 | } |
4776 | if (ioc->shost_recovery || ioc->remove_host) | 4790 | if (ioc->shost_recovery || ioc->remove_host || |
4791 | ioc->pci_error_recovery) | ||
4777 | return; | 4792 | return; |
4778 | phy_number = event_data->StartPhyNum + i; | 4793 | phy_number = event_data->StartPhyNum + i; |
4779 | reason_code = event_data->PHY[i].PhyStatus & | 4794 | reason_code = event_data->PHY[i].PhyStatus & |
@@ -6273,7 +6288,8 @@ _firmware_event_work(struct work_struct *work) | |||
6273 | struct MPT2SAS_ADAPTER *ioc = fw_event->ioc; | 6288 | struct MPT2SAS_ADAPTER *ioc = fw_event->ioc; |
6274 | 6289 | ||
6275 | /* the queue is being flushed so ignore this event */ | 6290 | /* the queue is being flushed so ignore this event */ |
6276 | if (ioc->remove_host || fw_event->cancel_pending_work) { | 6291 | if (ioc->remove_host || fw_event->cancel_pending_work || |
6292 | ioc->pci_error_recovery) { | ||
6277 | _scsih_fw_event_free(ioc, fw_event); | 6293 | _scsih_fw_event_free(ioc, fw_event); |
6278 | return; | 6294 | return; |
6279 | } | 6295 | } |
@@ -6355,7 +6371,7 @@ mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index, | |||
6355 | u16 sz; | 6371 | u16 sz; |
6356 | 6372 | ||
6357 | /* events turned off due to host reset or driver unloading */ | 6373 | /* events turned off due to host reset or driver unloading */ |
6358 | if (ioc->remove_host) | 6374 | if (ioc->remove_host || ioc->pci_error_recovery) |
6359 | return 1; | 6375 | return 1; |
6360 | 6376 | ||
6361 | mpi_reply = mpt2sas_base_get_reply_virt_addr(ioc, reply); | 6377 | mpi_reply = mpt2sas_base_get_reply_virt_addr(ioc, reply); |
@@ -7058,12 +7074,17 @@ _scsih_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) | |||
7058 | case pci_channel_io_normal: | 7074 | case pci_channel_io_normal: |
7059 | return PCI_ERS_RESULT_CAN_RECOVER; | 7075 | return PCI_ERS_RESULT_CAN_RECOVER; |
7060 | case pci_channel_io_frozen: | 7076 | case pci_channel_io_frozen: |
7077 | /* Fatal error, prepare for slot reset */ | ||
7078 | ioc->pci_error_recovery = 1; | ||
7061 | scsi_block_requests(ioc->shost); | 7079 | scsi_block_requests(ioc->shost); |
7062 | mpt2sas_base_stop_watchdog(ioc); | 7080 | mpt2sas_base_stop_watchdog(ioc); |
7063 | mpt2sas_base_free_resources(ioc); | 7081 | mpt2sas_base_free_resources(ioc); |
7064 | return PCI_ERS_RESULT_NEED_RESET; | 7082 | return PCI_ERS_RESULT_NEED_RESET; |
7065 | case pci_channel_io_perm_failure: | 7083 | case pci_channel_io_perm_failure: |
7066 | _scsih_remove(pdev); | 7084 | /* Permanent error, prepare for device removal */ |
7085 | ioc->pci_error_recovery = 1; | ||
7086 | mpt2sas_base_stop_watchdog(ioc); | ||
7087 | _scsih_flush_running_cmds(ioc); | ||
7067 | return PCI_ERS_RESULT_DISCONNECT; | 7088 | return PCI_ERS_RESULT_DISCONNECT; |
7068 | } | 7089 | } |
7069 | return PCI_ERS_RESULT_NEED_RESET; | 7090 | return PCI_ERS_RESULT_NEED_RESET; |
@@ -7087,7 +7108,9 @@ _scsih_pci_slot_reset(struct pci_dev *pdev) | |||
7087 | printk(MPT2SAS_INFO_FMT "PCI error: slot reset callback!!\n", | 7108 | printk(MPT2SAS_INFO_FMT "PCI error: slot reset callback!!\n", |
7088 | ioc->name); | 7109 | ioc->name); |
7089 | 7110 | ||
7111 | ioc->pci_error_recovery = 0; | ||
7090 | ioc->pdev = pdev; | 7112 | ioc->pdev = pdev; |
7113 | pci_restore_state(pdev); | ||
7091 | rc = mpt2sas_base_map_resources(ioc); | 7114 | rc = mpt2sas_base_map_resources(ioc); |
7092 | if (rc) | 7115 | if (rc) |
7093 | return PCI_ERS_RESULT_DISCONNECT; | 7116 | return PCI_ERS_RESULT_DISCONNECT; |