aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi/mpt2sas
diff options
context:
space:
mode:
authorEric Moore <eric.moore@lsi.com>2010-07-08 16:44:34 -0400
committerJames Bottomley <James.Bottomley@suse.de>2010-07-27 13:04:03 -0400
commit3cb5469a2ab4b87a7c63dd218fdc1625bc73eccc (patch)
tree65b46a61abf39654b8a367ff40c1ec880ad606fa /drivers/scsi/mpt2sas
parenta2f1d139df42df6f3a2641591dea9e068b68f68c (diff)
[SCSI] mpt2sas: driver fails to recover from injected PCIe bus errors
fixes surrounding PCIe enhanced error handling: (1) We need to reject all request generated internaly inside the driver as well as request arriving from the scsi mid layer when PCIe EEH is active. The fix is to add a per adapter flag called pci_error_recovery which is checked thru out the driver when request are generated. (2) We don't need to call the pci_driver->remove directly from the PCIe callbacks becuase its already called from the PCIe EEH code. In its place we are shutting down the watchdog timer, and flushing back all pending IO. (3) We need to save and restore the pci state across PCIe EEH handling. Signed-off-by: Eric Moore <eric.moore@lsi.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Diffstat (limited to 'drivers/scsi/mpt2sas')
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_base.c13
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_base.h2
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_config.c2
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_ctl.c4
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_scsih.c47
-rw-r--r--drivers/scsi/mpt2sas/mpt2sas_transport.c8
6 files changed, 57 insertions, 19 deletions
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
index 1f22a764927a..57bcd5c9dcff 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
@@ -1311,6 +1311,9 @@ mpt2sas_base_map_resources(struct MPT2SAS_ADAPTER *ioc)
1311 printk(MPT2SAS_INFO_FMT "ioport(0x%016llx), size(%d)\n", 1311 printk(MPT2SAS_INFO_FMT "ioport(0x%016llx), size(%d)\n",
1312 ioc->name, (unsigned long long)pio_chip, pio_sz); 1312 ioc->name, (unsigned long long)pio_chip, pio_sz);
1313 1313
1314 /* Save PCI configuration state for recovery from PCI AER/EEH errors */
1315 pci_save_state(pdev);
1316
1314 return 0; 1317 return 0;
1315 1318
1316 out_fail: 1319 out_fail:
@@ -3407,6 +3410,9 @@ _base_make_ioc_ready(struct MPT2SAS_ADAPTER *ioc, int sleep_flag,
3407 dinitprintk(ioc, printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, 3410 dinitprintk(ioc, printk(MPT2SAS_INFO_FMT "%s\n", ioc->name,
3408 __func__)); 3411 __func__));
3409 3412
3413 if (ioc->pci_error_recovery)
3414 return 0;
3415
3410 ioc_state = mpt2sas_base_get_iocstate(ioc, 0); 3416 ioc_state = mpt2sas_base_get_iocstate(ioc, 0);
3411 dhsprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: ioc_state(0x%08x)\n", 3417 dhsprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: ioc_state(0x%08x)\n",
3412 ioc->name, __func__, ioc_state)); 3418 ioc->name, __func__, ioc_state));
@@ -3869,6 +3875,13 @@ mpt2sas_base_hard_reset_handler(struct MPT2SAS_ADAPTER *ioc, int sleep_flag,
3869 dtmprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: enter\n", ioc->name, 3875 dtmprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: enter\n", ioc->name,
3870 __func__)); 3876 __func__));
3871 3877
3878 if (ioc->pci_error_recovery) {
3879 printk(MPT2SAS_ERR_FMT "%s: pci error recovery reset\n",
3880 ioc->name, __func__);
3881 r = 0;
3882 goto out;
3883 }
3884
3872 if (mpt2sas_fwfault_debug) 3885 if (mpt2sas_fwfault_debug)
3873 mpt2sas_halt_firmware(ioc); 3886 mpt2sas_halt_firmware(ioc);
3874 3887
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h
index 0b0ef5e7899a..0ebef0c0d949 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.h
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.h
@@ -477,6 +477,7 @@ typedef void (*MPT_ADD_SGE)(void *paddr, u32 flags_length, dma_addr_t dma_addr);
477 * @ioc_link_reset_in_progress: phy/hard reset in progress 477 * @ioc_link_reset_in_progress: phy/hard reset in progress
478 * @ignore_loginfos: ignore loginfos during task managment 478 * @ignore_loginfos: ignore loginfos during task managment
479 * @remove_host: flag for when driver unloads, to avoid sending dev resets 479 * @remove_host: flag for when driver unloads, to avoid sending dev resets
480 * @pci_error_recovery: flag to prevent ioc access until slot reset completes
480 * @wait_for_port_enable_to_complete: 481 * @wait_for_port_enable_to_complete:
481 * @msix_enable: flag indicating msix is enabled 482 * @msix_enable: flag indicating msix is enabled
482 * @msix_vector_count: number msix vectors 483 * @msix_vector_count: number msix vectors
@@ -617,6 +618,7 @@ struct MPT2SAS_ADAPTER {
617 618
618 u8 ignore_loginfos; 619 u8 ignore_loginfos;
619 u8 remove_host; 620 u8 remove_host;
621 u8 pci_error_recovery;
620 u8 wait_for_port_enable_to_complete; 622 u8 wait_for_port_enable_to_complete;
621 623
622 u8 msix_enable; 624 u8 msix_enable;
diff --git a/drivers/scsi/mpt2sas/mpt2sas_config.c b/drivers/scsi/mpt2sas/mpt2sas_config.c
index e26f9206a521..6afd67b324fe 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_config.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_config.c
@@ -401,7 +401,7 @@ _config_request(struct MPT2SAS_ADAPTER *ioc, Mpi2ConfigRequest_t
401 if (ioc->config_cmds.smid == smid) 401 if (ioc->config_cmds.smid == smid)
402 mpt2sas_base_free_smid(ioc, smid); 402 mpt2sas_base_free_smid(ioc, smid);
403 if ((ioc->shost_recovery) || (ioc->config_cmds.status & 403 if ((ioc->shost_recovery) || (ioc->config_cmds.status &
404 MPT2_CMD_RESET)) 404 MPT2_CMD_RESET) || ioc->pci_error_recovery)
405 goto retry_config; 405 goto retry_config;
406 issue_host_reset = 1; 406 issue_host_reset = 1;
407 r = -EFAULT; 407 r = -EFAULT;
diff --git a/drivers/scsi/mpt2sas/mpt2sas_ctl.c b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
index 55ac1cb34778..b774973f0765 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_ctl.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_ctl.c
@@ -2156,7 +2156,7 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg)
2156 !ioc) 2156 !ioc)
2157 return -ENODEV; 2157 return -ENODEV;
2158 2158
2159 if (ioc->shost_recovery) 2159 if (ioc->shost_recovery || ioc->pci_error_recovery)
2160 return -EAGAIN; 2160 return -EAGAIN;
2161 2161
2162 if (_IOC_SIZE(cmd) == sizeof(struct mpt2_ioctl_command)) { 2162 if (_IOC_SIZE(cmd) == sizeof(struct mpt2_ioctl_command)) {
@@ -2275,7 +2275,7 @@ _ctl_compat_mpt_command(struct file *file, unsigned cmd, unsigned long arg)
2275 if (_ctl_verify_adapter(karg32.hdr.ioc_number, &ioc) == -1 || !ioc) 2275 if (_ctl_verify_adapter(karg32.hdr.ioc_number, &ioc) == -1 || !ioc)
2276 return -ENODEV; 2276 return -ENODEV;
2277 2277
2278 if (ioc->shost_recovery) 2278 if (ioc->shost_recovery || ioc->pci_error_recovery)
2279 return -EAGAIN; 2279 return -EAGAIN;
2280 2280
2281 memset(&karg, 0, sizeof(struct mpt2_ioctl_command)); 2281 memset(&karg, 0, sizeof(struct mpt2_ioctl_command));
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 854cc91e7aac..6273abd0535e 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -1997,7 +1997,8 @@ mpt2sas_scsih_issue_tm(struct MPT2SAS_ADAPTER *ioc, u16 handle, uint channel,
1997 goto err_out; 1997 goto err_out;
1998 } 1998 }
1999 1999
2000 if (ioc->shost_recovery || ioc->remove_host) { 2000 if (ioc->shost_recovery || ioc->remove_host ||
2001 ioc->pci_error_recovery) {
2001 printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n", 2002 printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n",
2002 __func__, ioc->name); 2003 __func__, ioc->name);
2003 rc = FAILED; 2004 rc = FAILED;
@@ -2644,7 +2645,8 @@ _scsih_tm_tr_send(struct MPT2SAS_ADAPTER *ioc, u16 handle)
2644 unsigned long flags; 2645 unsigned long flags;
2645 struct _tr_list *delayed_tr; 2646 struct _tr_list *delayed_tr;
2646 2647
2647 if (ioc->shost_recovery || ioc->remove_host) { 2648 if (ioc->shost_recovery || ioc->remove_host ||
2649 ioc->pci_error_recovery) {
2648 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " 2650 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in "
2649 "progress!\n", __func__, ioc->name)); 2651 "progress!\n", __func__, ioc->name));
2650 return; 2652 return;
@@ -2742,7 +2744,8 @@ _scsih_tm_tr_volume_send(struct MPT2SAS_ADAPTER *ioc, u16 handle)
2742 u16 smid; 2744 u16 smid;
2743 struct _tr_list *delayed_tr; 2745 struct _tr_list *delayed_tr;
2744 2746
2745 if (ioc->shost_recovery || ioc->remove_host) { 2747 if (ioc->shost_recovery || ioc->remove_host ||
2748 ioc->pci_error_recovery) {
2746 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " 2749 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in "
2747 "progress!\n", __func__, ioc->name)); 2750 "progress!\n", __func__, ioc->name));
2748 return; 2751 return;
@@ -2793,7 +2796,8 @@ _scsih_tm_volume_tr_complete(struct MPT2SAS_ADAPTER *ioc, u16 smid,
2793 Mpi2SCSITaskManagementReply_t *mpi_reply = 2796 Mpi2SCSITaskManagementReply_t *mpi_reply =
2794 mpt2sas_base_get_reply_virt_addr(ioc, reply); 2797 mpt2sas_base_get_reply_virt_addr(ioc, reply);
2795 2798
2796 if (ioc->shost_recovery || ioc->remove_host) { 2799 if (ioc->shost_recovery || ioc->remove_host ||
2800 ioc->pci_error_recovery) {
2797 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " 2801 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in "
2798 "progress!\n", __func__, ioc->name)); 2802 "progress!\n", __func__, ioc->name));
2799 return 1; 2803 return 1;
@@ -2845,7 +2849,8 @@ _scsih_tm_tr_complete(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
2845 Mpi2SasIoUnitControlRequest_t *mpi_request; 2849 Mpi2SasIoUnitControlRequest_t *mpi_request;
2846 u16 smid_sas_ctrl; 2850 u16 smid_sas_ctrl;
2847 2851
2848 if (ioc->shost_recovery || ioc->remove_host) { 2852 if (ioc->shost_recovery || ioc->remove_host ||
2853 ioc->pci_error_recovery) {
2849 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in " 2854 dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: host reset in "
2850 "progress!\n", __func__, ioc->name)); 2855 "progress!\n", __func__, ioc->name));
2851 return 1; 2856 return 1;
@@ -3187,7 +3192,10 @@ _scsih_flush_running_cmds(struct MPT2SAS_ADAPTER *ioc)
3187 count++; 3192 count++;
3188 mpt2sas_base_free_smid(ioc, smid); 3193 mpt2sas_base_free_smid(ioc, smid);
3189 scsi_dma_unmap(scmd); 3194 scsi_dma_unmap(scmd);
3190 scmd->result = DID_RESET << 16; 3195 if (ioc->pci_error_recovery)
3196 scmd->result = DID_NO_CONNECT << 16;
3197 else
3198 scmd->result = DID_RESET << 16;
3191 scmd->scsi_done(scmd); 3199 scmd->scsi_done(scmd);
3192 } 3200 }
3193 dtmprintk(ioc, printk(MPT2SAS_INFO_FMT "completing %d cmds\n", 3201 dtmprintk(ioc, printk(MPT2SAS_INFO_FMT "completing %d cmds\n",
@@ -3324,6 +3332,12 @@ _scsih_qcmd(struct scsi_cmnd *scmd, void (*done)(struct scsi_cmnd *))
3324 return 0; 3332 return 0;
3325 } 3333 }
3326 3334
3335 if (ioc->pci_error_recovery) {
3336 scmd->result = DID_NO_CONNECT << 16;
3337 scmd->scsi_done(scmd);
3338 return 0;
3339 }
3340
3327 sas_target_priv_data = sas_device_priv_data->sas_target; 3341 sas_target_priv_data = sas_device_priv_data->sas_target;
3328 /* invalid device handle */ 3342 /* invalid device handle */
3329 if (sas_target_priv_data->handle == MPT2SAS_INVALID_DEVICE_HANDLE) { 3343 if (sas_target_priv_data->handle == MPT2SAS_INVALID_DEVICE_HANDLE) {
@@ -4156,7 +4170,7 @@ _scsih_expander_add(struct MPT2SAS_ADAPTER *ioc, u16 handle)
4156 if (!handle) 4170 if (!handle)
4157 return -1; 4171 return -1;
4158 4172
4159 if (ioc->shost_recovery) 4173 if (ioc->shost_recovery || ioc->pci_error_recovery)
4160 return -1; 4174 return -1;
4161 4175
4162 if ((mpt2sas_config_get_expander_pg0(ioc, &mpi_reply, &expander_pg0, 4176 if ((mpt2sas_config_get_expander_pg0(ioc, &mpi_reply, &expander_pg0,
@@ -4734,7 +4748,7 @@ _scsih_sas_topology_change_event(struct MPT2SAS_ADAPTER *ioc,
4734 _scsih_sas_topology_change_event_debug(ioc, event_data); 4748 _scsih_sas_topology_change_event_debug(ioc, event_data);
4735#endif 4749#endif
4736 4750
4737 if (ioc->shost_recovery || ioc->remove_host) 4751 if (ioc->shost_recovery || ioc->remove_host || ioc->pci_error_recovery)
4738 return; 4752 return;
4739 4753
4740 if (!ioc->sas_hba.num_phys) 4754 if (!ioc->sas_hba.num_phys)
@@ -4773,7 +4787,8 @@ _scsih_sas_topology_change_event(struct MPT2SAS_ADAPTER *ioc,
4773 "expander event\n", ioc->name)); 4787 "expander event\n", ioc->name));
4774 return; 4788 return;
4775 } 4789 }
4776 if (ioc->shost_recovery || ioc->remove_host) 4790 if (ioc->shost_recovery || ioc->remove_host ||
4791 ioc->pci_error_recovery)
4777 return; 4792 return;
4778 phy_number = event_data->StartPhyNum + i; 4793 phy_number = event_data->StartPhyNum + i;
4779 reason_code = event_data->PHY[i].PhyStatus & 4794 reason_code = event_data->PHY[i].PhyStatus &
@@ -6273,7 +6288,8 @@ _firmware_event_work(struct work_struct *work)
6273 struct MPT2SAS_ADAPTER *ioc = fw_event->ioc; 6288 struct MPT2SAS_ADAPTER *ioc = fw_event->ioc;
6274 6289
6275 /* the queue is being flushed so ignore this event */ 6290 /* the queue is being flushed so ignore this event */
6276 if (ioc->remove_host || fw_event->cancel_pending_work) { 6291 if (ioc->remove_host || fw_event->cancel_pending_work ||
6292 ioc->pci_error_recovery) {
6277 _scsih_fw_event_free(ioc, fw_event); 6293 _scsih_fw_event_free(ioc, fw_event);
6278 return; 6294 return;
6279 } 6295 }
@@ -6355,7 +6371,7 @@ mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index,
6355 u16 sz; 6371 u16 sz;
6356 6372
6357 /* events turned off due to host reset or driver unloading */ 6373 /* events turned off due to host reset or driver unloading */
6358 if (ioc->remove_host) 6374 if (ioc->remove_host || ioc->pci_error_recovery)
6359 return 1; 6375 return 1;
6360 6376
6361 mpi_reply = mpt2sas_base_get_reply_virt_addr(ioc, reply); 6377 mpi_reply = mpt2sas_base_get_reply_virt_addr(ioc, reply);
@@ -7058,12 +7074,17 @@ _scsih_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
7058 case pci_channel_io_normal: 7074 case pci_channel_io_normal:
7059 return PCI_ERS_RESULT_CAN_RECOVER; 7075 return PCI_ERS_RESULT_CAN_RECOVER;
7060 case pci_channel_io_frozen: 7076 case pci_channel_io_frozen:
7077 /* Fatal error, prepare for slot reset */
7078 ioc->pci_error_recovery = 1;
7061 scsi_block_requests(ioc->shost); 7079 scsi_block_requests(ioc->shost);
7062 mpt2sas_base_stop_watchdog(ioc); 7080 mpt2sas_base_stop_watchdog(ioc);
7063 mpt2sas_base_free_resources(ioc); 7081 mpt2sas_base_free_resources(ioc);
7064 return PCI_ERS_RESULT_NEED_RESET; 7082 return PCI_ERS_RESULT_NEED_RESET;
7065 case pci_channel_io_perm_failure: 7083 case pci_channel_io_perm_failure:
7066 _scsih_remove(pdev); 7084 /* Permanent error, prepare for device removal */
7085 ioc->pci_error_recovery = 1;
7086 mpt2sas_base_stop_watchdog(ioc);
7087 _scsih_flush_running_cmds(ioc);
7067 return PCI_ERS_RESULT_DISCONNECT; 7088 return PCI_ERS_RESULT_DISCONNECT;
7068 } 7089 }
7069 return PCI_ERS_RESULT_NEED_RESET; 7090 return PCI_ERS_RESULT_NEED_RESET;
@@ -7087,7 +7108,9 @@ _scsih_pci_slot_reset(struct pci_dev *pdev)
7087 printk(MPT2SAS_INFO_FMT "PCI error: slot reset callback!!\n", 7108 printk(MPT2SAS_INFO_FMT "PCI error: slot reset callback!!\n",
7088 ioc->name); 7109 ioc->name);
7089 7110
7111 ioc->pci_error_recovery = 0;
7090 ioc->pdev = pdev; 7112 ioc->pdev = pdev;
7113 pci_restore_state(pdev);
7091 rc = mpt2sas_base_map_resources(ioc); 7114 rc = mpt2sas_base_map_resources(ioc);
7092 if (rc) 7115 if (rc)
7093 return PCI_ERS_RESULT_DISCONNECT; 7116 return PCI_ERS_RESULT_DISCONNECT;
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index f29ea5e78bb3..b55c6dc07470 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c
@@ -140,7 +140,7 @@ _transport_set_identify(struct MPT2SAS_ADAPTER *ioc, u16 handle,
140 u32 device_info; 140 u32 device_info;
141 u32 ioc_status; 141 u32 ioc_status;
142 142
143 if (ioc->shost_recovery) { 143 if (ioc->shost_recovery || ioc->pci_error_recovery) {
144 printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n", 144 printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n",
145 __func__, ioc->name); 145 __func__, ioc->name);
146 return -EFAULT; 146 return -EFAULT;
@@ -302,7 +302,7 @@ _transport_expander_report_manufacture(struct MPT2SAS_ADAPTER *ioc,
302 u64 *sas_address_le; 302 u64 *sas_address_le;
303 u16 wait_state_count; 303 u16 wait_state_count;
304 304
305 if (ioc->shost_recovery) { 305 if (ioc->shost_recovery || ioc->pci_error_recovery) {
306 printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n", 306 printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n",
307 __func__, ioc->name); 307 __func__, ioc->name);
308 return -EFAULT; 308 return -EFAULT;
@@ -894,7 +894,7 @@ mpt2sas_transport_update_links(struct MPT2SAS_ADAPTER *ioc,
894 struct _sas_node *sas_node; 894 struct _sas_node *sas_node;
895 struct _sas_phy *mpt2sas_phy; 895 struct _sas_phy *mpt2sas_phy;
896 896
897 if (ioc->shost_recovery) 897 if (ioc->shost_recovery || ioc->pci_error_recovery)
898 return; 898 return;
899 899
900 spin_lock_irqsave(&ioc->sas_node_lock, flags); 900 spin_lock_irqsave(&ioc->sas_node_lock, flags);
@@ -997,7 +997,7 @@ _transport_get_expander_phy_error_log(struct MPT2SAS_ADAPTER *ioc,
997 u64 *sas_address_le; 997 u64 *sas_address_le;
998 u16 wait_state_count; 998 u16 wait_state_count;
999 999
1000 if (ioc->shost_recovery) { 1000 if (ioc->shost_recovery || ioc->pci_error_recovery) {
1001 printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n", 1001 printk(MPT2SAS_INFO_FMT "%s: host reset in progress!\n",
1002 __func__, ioc->name); 1002 __func__, ioc->name);
1003 return -EFAULT; 1003 return -EFAULT;