aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi/mpt2sas/mpt2sas_scsih.c
diff options
context:
space:
mode:
authorEric Moore <eric.moore@lsi.com>2010-07-08 16:44:34 -0400
committerJames Bottomley <James.Bottomley@suse.de>2010-07-27 13:04:03 -0400
commit3cb5469a2ab4b87a7c63dd218fdc1625bc73eccc (patch)
tree65b46a61abf39654b8a367ff40c1ec880ad606fa /drivers/scsi/mpt2sas/mpt2sas_scsih.c
parenta2f1d139df42df6f3a2641591dea9e068b68f68c (diff)
[SCSI] mpt2sas: driver fails to recover from injected PCIe bus errors
fixes surrounding PCIe enhanced error handling: (1) We need to reject all request generated internaly inside the driver as well as request arriving from the scsi mid layer when PCIe EEH is active. The fix is to add a per adapter flag called pci_error_recovery which is checked thru out the driver when request are generated. (2) We don't need to call the pci_driver->remove directly from the PCIe callbacks becuase its already called from the PCIe EEH code. In its place we are shutting down the watchdog timer, and flushing back all pending IO. (3) We need to save and restore the pci state across PCIe EEH handling. Signed-off-by: Eric Moore <eric.moore@lsi.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Diffstat (limited to 'drivers/scsi/mpt2sas/mpt2sas_scsih.c')
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_scsih.c47
1 files changed, 35 insertions, 12 deletions
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 854cc91e7aac..6273abd0535e 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -1997,7 +1997,8 @@ mpt2sas_scsih_issue_tm(struct MPT2SAS_ADAPTER *ioc, u16 handle, uint channel,
1997 goto err_out; 1997 goto err_out;
1998 } 1998 }
1999 1999
2000 if (ioc->shost_recovery || ioc->remove_host) { 2000 if (ioc->shost_recovery || ioc->remove_host ||
2001 ioc->pci_error_recovery) {
2001 printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n", 2002 printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n",
2002 __func__, ioc->name); 2003 __func__, ioc->name);
2003 rc = FAILED; 2004 rc = FAILED;
@@ -2644,7 +2645,8 @@ _scsih_tm_tr_send(struct MPT2SAS_ADAPTER *ioc, u16 handle)
2644 unsigned long flags; 2645 unsigned long flags;
2645 struct _tr_list *delayed_tr; 2646 struct _tr_list *delayed_tr;
2646 2647
2647 if (ioc->shost_recovery || ioc->remove_host) { 2648 if (ioc->shost_recovery || ioc->remove_host ||
2649 ioc->pci_error_recovery) {
2648 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " 2650 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in "
2649 "progress!\n", __func__, ioc->name)); 2651 "progress!\n", __func__, ioc->name));
2650 return; 2652 return;
@@ -2742,7 +2744,8 @@ _scsih_tm_tr_volume_send(struct MPT2SAS_ADAPTER *ioc, u16 handle)
2742 u16 smid; 2744 u16 smid;
2743 struct _tr_list *delayed_tr; 2745 struct _tr_list *delayed_tr;
2744 2746
2745 if (ioc->shost_recovery || ioc->remove_host) { 2747 if (ioc->shost_recovery || ioc->remove_host ||
2748 ioc->pci_error_recovery) {
2746 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " 2749 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in "
2747 "progress!\n", __func__, ioc->name)); 2750 "progress!\n", __func__, ioc->name));
2748 return; 2751 return;
@@ -2793,7 +2796,8 @@ _scsih_tm_volume_tr_complete(struct MPT2SAS_ADAPTER *ioc, u16 smid,
2793 Mpi2SCSITaskManagementReply_t *mpi_reply = 2796 Mpi2SCSITaskManagementReply_t *mpi_reply =
2794 mpt2sas_base_get_reply_virt_addr(ioc, reply); 2797 mpt2sas_base_get_reply_virt_addr(ioc, reply);
2795 2798
2796 if (ioc->shost_recovery || ioc->remove_host) { 2799 if (ioc->shost_recovery || ioc->remove_host ||
2800 ioc->pci_error_recovery) {
2797 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " 2801 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in "
2798 "progress!\n", __func__, ioc->name)); 2802 "progress!\n", __func__, ioc->name));
2799 return 1; 2803 return 1;
@@ -2845,7 +2849,8 @@ _scsih_tm_tr_complete(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
2845 Mpi2SasIoUnitControlRequest_t *mpi_request; 2849 Mpi2SasIoUnitControlRequest_t *mpi_request;
2846 u16 smid_sas_ctrl; 2850 u16 smid_sas_ctrl;
2847 2851
2848 if (ioc->shost_recovery || ioc->remove_host) { 2852 if (ioc->shost_recovery || ioc->remove_host ||
2853 ioc->pci_error_recovery) {
2849 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " 2854 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in "
2850 "progress!\n", __func__, ioc->name)); 2855 "progress!\n", __func__, ioc->name));
2851 return 1; 2856 return 1;
@@ -3187,7 +3192,10 @@ _scsih_flush_running_cmds(struct MPT2SAS_ADAPTER *ioc)
3187 count++; 3192 count++;
3188 mpt2sas_base_free_smid(ioc, smid); 3193 mpt2sas_base_free_smid(ioc, smid);
3189 scsi_dma_unmap(scmd); 3194 scsi_dma_unmap(scmd);
3190 scmd->result = DID_RESET << 16; 3195 if (ioc->pci_error_recovery)
3196 scmd->result = DID_NO_CONNECT << 16;
3197 else
3198 scmd->result = DID_RESET << 16;
3191 scmd->scsi_done(scmd); 3199 scmd->scsi_done(scmd);
3192 } 3200 }
3193 dtmprintk(ioc, printk(MPT2SAS_INFO_FMT "completing %d cmds\n", 3201 dtmprintk(ioc, printk(MPT2SAS_INFO_FMT "completing %d cmds\n",
@@ -3324,6 +3332,12 @@ _scsih_qcmd(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
3324 return 0; 3332 return 0;
3325 } 3333 }
3326 3334
3335 if (ioc->pci_error_recovery) {
3336 scmd->result = DID_NO_CONNECT << 16;
3337 scmd->scsi_done(scmd);
3338 return 0;
3339 }
3340
3327 sas_target_priv_data = sas_device_priv_data->sas_target; 3341 sas_target_priv_data = sas_device_priv_data->sas_target;
3328 /* invalid device handle */ 3342 /* invalid device handle */
3329 if (sas_target_priv_data->handle == MPT2SAS_INVALID_DEVICE_HANDLE) { 3343 if (sas_target_priv_data->handle == MPT2SAS_INVALID_DEVICE_HANDLE) {
@@ -4156,7 +4170,7 @@ _scsih_expander_add(struct MPT2SAS_ADAPTER *ioc, u16 handle)
4156 if (!handle) 4170 if (!handle)
4157 return -1; 4171 return -1;
4158 4172
4159 if (ioc->shost_recovery) 4173 if (ioc->shost_recovery || ioc->pci_error_recovery)
4160 return -1; 4174 return -1;
4161 4175
4162 if ((mpt2sas_config_get_expander_pg0(ioc, &mpi_reply, &expander_pg0, 4176 if ((mpt2sas_config_get_expander_pg0(ioc, &mpi_reply, &expander_pg0,
@@ -4734,7 +4748,7 @@ _scsih_sas_topology_change_event(struct MPT2SAS_ADAPTER *ioc,
4734 _scsih_sas_topology_change_event_debug(ioc, event_data); 4748 _scsih_sas_topology_change_event_debug(ioc, event_data);
4735#endif 4749#endif
4736 4750
4737 if (ioc->shost_recovery || ioc->remove_host) 4751 if (ioc->shost_recovery || ioc->remove_host || ioc->pci_error_recovery)
4738 return; 4752 return;
4739 4753
4740 if (!ioc->sas_hba.num_phys) 4754 if (!ioc->sas_hba.num_phys)
@@ -4773,7 +4787,8 @@ _scsih_sas_topology_change_event(struct MPT2SAS_ADAPTER *ioc,
4773 "expander event\n", ioc->name)); 4787 "expander event\n", ioc->name));
4774 return; 4788 return;
4775 } 4789 }
4776 if (ioc->shost_recovery || ioc->remove_host) 4790 if (ioc->shost_recovery || ioc->remove_host ||
4791 ioc->pci_error_recovery)
4777 return; 4792 return;
4778 phy_number = event_data->StartPhyNum + i; 4793 phy_number = event_data->StartPhyNum + i;
4779 reason_code = event_data->PHY[i].PhyStatus & 4794 reason_code = event_data->PHY[i].PhyStatus &
@@ -6273,7 +6288,8 @@ _firmware_event_work(struct work_struct *work)
6273 struct MPT2SAS_ADAPTER *ioc = fw_event->ioc; 6288 struct MPT2SAS_ADAPTER *ioc = fw_event->ioc;
6274 6289
6275 /* the queue is being flushed so ignore this event */ 6290 /* the queue is being flushed so ignore this event */
6276 if (ioc->remove_host || fw_event->cancel_pending_work) { 6291 if (ioc->remove_host || fw_event->cancel_pending_work ||
6292 ioc->pci_error_recovery) {
6277 _scsih_fw_event_free(ioc, fw_event); 6293 _scsih_fw_event_free(ioc, fw_event);
6278 return; 6294 return;
6279 } 6295 }
@@ -6355,7 +6371,7 @@ mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index,
6355 u16 sz; 6371 u16 sz;
6356 6372
6357 /* events turned off due to host reset or driver unloading */ 6373 /* events turned off due to host reset or driver unloading */
6358 if (ioc->remove_host) 6374 if (ioc->remove_host || ioc->pci_error_recovery)
6359 return 1; 6375 return 1;
6360 6376
6361 mpi_reply = mpt2sas_base_get_reply_virt_addr(ioc, reply); 6377 mpi_reply = mpt2sas_base_get_reply_virt_addr(ioc, reply);
@@ -7058,12 +7074,17 @@ _scsih_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
7058 case pci_channel_io_normal: 7074 case pci_channel_io_normal:
7059 return PCI_ERS_RESULT_CAN_RECOVER; 7075 return PCI_ERS_RESULT_CAN_RECOVER;
7060 case pci_channel_io_frozen: 7076 case pci_channel_io_frozen:
7077 /* Fatal error, prepare for slot reset */
7078 ioc->pci_error_recovery = 1;
7061 scsi_block_requests(ioc->shost); 7079 scsi_block_requests(ioc->shost);
7062 mpt2sas_base_stop_watchdog(ioc); 7080 mpt2sas_base_stop_watchdog(ioc);
7063 mpt2sas_base_free_resources(ioc); 7081 mpt2sas_base_free_resources(ioc);
7064 return PCI_ERS_RESULT_NEED_RESET; 7082 return PCI_ERS_RESULT_NEED_RESET;
7065 case pci_channel_io_perm_failure: 7083 case pci_channel_io_perm_failure:
7066 _scsih_remove(pdev); 7084 /* Permanent error, prepare for device removal */
7085 ioc->pci_error_recovery = 1;
7086 mpt2sas_base_stop_watchdog(ioc);
7087 _scsih_flush_running_cmds(ioc);
7067 return PCI_ERS_RESULT_DISCONNECT; 7088 return PCI_ERS_RESULT_DISCONNECT;
7068 } 7089 }
7069 return PCI_ERS_RESULT_NEED_RESET; 7090 return PCI_ERS_RESULT_NEED_RESET;
@@ -7087,7 +7108,9 @@ _scsih_pci_slot_reset(struct pci_dev *pdev)
7087 printk(MPT2SAS_INFO_FMT "PCI error: slot reset callback!!\n", 7108 printk(MPT2SAS_INFO_FMT "PCI error: slot reset callback!!\n",
7088 ioc->name); 7109 ioc->name);
7089 7110
7111 ioc->pci_error_recovery = 0;
7090 ioc->pdev = pdev; 7112 ioc->pdev = pdev;
7113 pci_restore_state(pdev);
7091 rc = mpt2sas_base_map_resources(ioc); 7114 rc = mpt2sas_base_map_resources(ioc);
7092 if (rc) 7115 if (rc)
7093 return PCI_ERS_RESULT_DISCONNECT; 7116 return PCI_ERS_RESULT_DISCONNECT;