aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/message
diff options
context:
space:
mode:
authorKashyap, Desai <kashyap.desai@lsi.com>2010-07-26 09:26:21 -0400
committerJames Bottomley <James.Bottomley@suse.de>2010-07-28 10:07:46 -0400
commitc9de7dc48307395fb71780b567ae8833b080d1c8 (patch)
treeb034f8833d64b640d9ef9e3f61191d06b51e5027 /drivers/message
parentb5833cbbd7c2bc3462e684feadd8e6a0ca8e5387 (diff)
[SCSI] mptfusion: Block Error handling for deleting devices or Device in DMD
Issue description: In multipath topology, when device deletion is in transient state, multipath driver can call blk_flush_queue() as part of path failure. Before device get deleted from OS, Device may go OFFLINE as part of error handling kicked off triggered from multipathing driver. Above condition hits more frequently if device missing delay timer (which is LSI specific firmware parameter) is non zero value. root cause of this issue is Error handling thread is getting kicked off for device which is not really present(in transient state of deleting). This patch has solution for this issue. driver is now using eh_timed_out callback. See below. mptsas_transport_template->eh_timed_out = mptsas_eh_timed_out Using mptsas_eh_timed_out function, driver can decide weather vdevice is under Device missing delay or deleting state. for either of those cases, there is BLK_EH_RESET_TIMER return to scsi mid and error handling thread will not be kicked off for that particular scsi command. Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com> Cc: Stable Tree <stable@kernel.org> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Diffstat (limited to 'drivers/message')
-rw-r--r--drivers/message/fusion/mptbase.h2
-rw-r--r--drivers/message/fusion/mptsas.c125
-rw-r--r--drivers/message/fusion/mptscsih.c40
3 files changed, 157 insertions, 10 deletions
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index 0d149c82e764..7f31973b3f7c 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -396,6 +396,8 @@ typedef struct _VirtTarget {
396 u8 raidVolume; /* set, if RAID Volume */ 396 u8 raidVolume; /* set, if RAID Volume */
397 u8 type; /* byte 0 of Inquiry data */ 397 u8 type; /* byte 0 of Inquiry data */
398 u8 deleted; /* target in process of being removed */ 398 u8 deleted; /* target in process of being removed */
399 u8 inDMD; /* currently in the device
400 removal delay timer */
399 u32 num_luns; 401 u32 num_luns;
400} VirtTarget; 402} VirtTarget;
401 403
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index f705a235300e..235113ac08e5 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -57,6 +57,7 @@
57#include <scsi/scsi_device.h> 57#include <scsi/scsi_device.h>
58#include <scsi/scsi_host.h> 58#include <scsi/scsi_host.h>
59#include <scsi/scsi_transport_sas.h> 59#include <scsi/scsi_transport_sas.h>
60#include <scsi/scsi_transport.h>
60#include <scsi/scsi_dbg.h> 61#include <scsi/scsi_dbg.h>
61 62
62#include "mptbase.h" 63#include "mptbase.h"
@@ -1912,6 +1913,48 @@ mptsas_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
1912 return mptscsih_qcmd(SCpnt,done); 1913 return mptscsih_qcmd(SCpnt,done);
1913} 1914}
1914 1915
1916/**
1917 * mptsas_mptsas_eh_timed_out - resets the scsi_cmnd timeout
1918 * if the device under question is currently in the
1919 * device removal delay.
1920 * @sc: scsi command that the midlayer is about to time out
1921 *
1922 **/
1923static enum blk_eh_timer_return mptsas_eh_timed_out(struct scsi_cmnd *sc)
1924{
1925 MPT_SCSI_HOST *hd;
1926 MPT_ADAPTER *ioc;
1927 VirtDevice *vdevice;
1928 enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
1929
1930 hd = shost_priv(sc->device->host);
1931 if (hd == NULL) {
1932 printk(KERN_ERR MYNAM ": %s: Can't locate host! (sc=%p)\n",
1933 __func__, sc);
1934 goto done;
1935 }
1936
1937 ioc = hd->ioc;
1938 if (ioc->bus_type != SAS) {
1939 printk(KERN_ERR MYNAM ": %s: Wrong bus type (sc=%p)\n",
1940 __func__, sc);
1941 goto done;
1942 }
1943
1944 vdevice = sc->device->hostdata;
1945 if (vdevice && vdevice->vtarget && (vdevice->vtarget->inDMD
1946 || vdevice->vtarget->deleted)) {
1947 dtmprintk(ioc, printk(MYIOC_s_WARN_FMT ": %s: target removed "
1948 "or in device removal delay (sc=%p)\n",
1949 ioc->name, __func__, sc));
1950 rc = BLK_EH_RESET_TIMER;
1951 goto done;
1952 }
1953
1954done:
1955 return rc;
1956}
1957
1915 1958
1916static struct scsi_host_template mptsas_driver_template = { 1959static struct scsi_host_template mptsas_driver_template = {
1917 .module = THIS_MODULE, 1960 .module = THIS_MODULE,
@@ -2984,6 +3027,7 @@ static int mptsas_probe_one_phy(struct device *dev,
2984 struct sas_phy *phy; 3027 struct sas_phy *phy;
2985 struct sas_port *port; 3028 struct sas_port *port;
2986 int error = 0; 3029 int error = 0;
3030 VirtTarget *vtarget;
2987 3031
2988 if (!dev) { 3032 if (!dev) {
2989 error = -ENODEV; 3033 error = -ENODEV;
@@ -3206,6 +3250,16 @@ static int mptsas_probe_one_phy(struct device *dev,
3206 rphy_to_expander_device(rphy)); 3250 rphy_to_expander_device(rphy));
3207 } 3251 }
3208 3252
3253 /* If the device exists,verify it wasn't previously flagged
3254 as a missing device. If so, clear it */
3255 vtarget = mptsas_find_vtarget(ioc,
3256 phy_info->attached.channel,
3257 phy_info->attached.id);
3258 if (vtarget && vtarget->inDMD) {
3259 printk(KERN_INFO "Device returned, unsetting inDMD\n");
3260 vtarget->inDMD = 0;
3261 }
3262
3209 out: 3263 out:
3210 return error; 3264 return error;
3211} 3265}
@@ -3659,9 +3713,42 @@ mptsas_send_link_status_event(struct fw_event_work *fw_event)
3659 MPI_SAS_IOUNIT0_RATE_FAILED_SPEED_NEGOTIATION) 3713 MPI_SAS_IOUNIT0_RATE_FAILED_SPEED_NEGOTIATION)
3660 phy_info->phy->negotiated_linkrate = 3714 phy_info->phy->negotiated_linkrate =
3661 SAS_LINK_RATE_FAILED; 3715 SAS_LINK_RATE_FAILED;
3662 else 3716 else {
3663 phy_info->phy->negotiated_linkrate = 3717 phy_info->phy->negotiated_linkrate =
3664 SAS_LINK_RATE_UNKNOWN; 3718 SAS_LINK_RATE_UNKNOWN;
3719 if (ioc->device_missing_delay &&
3720 mptsas_is_end_device(&phy_info->attached)) {
3721 struct scsi_device *sdev;
3722 VirtDevice *vdevice;
3723 u8 channel, id;
3724 id = phy_info->attached.id;
3725 channel = phy_info->attached.channel;
3726 devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
3727 "Link down for fw_id %d:fw_channel %d\n",
3728 ioc->name, phy_info->attached.id,
3729 phy_info->attached.channel));
3730
3731 shost_for_each_device(sdev, ioc->sh) {
3732 vdevice = sdev->hostdata;
3733 if ((vdevice == NULL) ||
3734 (vdevice->vtarget == NULL))
3735 continue;
3736 if ((vdevice->vtarget->tflags &
3737 MPT_TARGET_FLAGS_RAID_COMPONENT ||
3738 vdevice->vtarget->raidVolume))
3739 continue;
3740 if (vdevice->vtarget->id == id &&
3741 vdevice->vtarget->channel ==
3742 channel)
3743 devtprintk(ioc,
3744 printk(MYIOC_s_DEBUG_FMT
3745 "SDEV OUTSTANDING CMDS"
3746 "%d\n", ioc->name,
3747 sdev->device_busy));
3748 }
3749
3750 }
3751 }
3665 } 3752 }
3666 out: 3753 out:
3667 mptsas_free_fw_event(ioc, fw_event); 3754 mptsas_free_fw_event(ioc, fw_event);
@@ -4906,12 +4993,47 @@ mptsas_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *reply)
4906 { 4993 {
4907 EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *sas_event_data = 4994 EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *sas_event_data =
4908 (EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *)reply->Data; 4995 (EVENT_DATA_SAS_DEVICE_STATUS_CHANGE *)reply->Data;
4996 u16 ioc_stat;
4997 ioc_stat = le16_to_cpu(reply->IOCStatus);
4909 4998
4910 if (sas_event_data->ReasonCode == 4999 if (sas_event_data->ReasonCode ==
4911 MPI_EVENT_SAS_DEV_STAT_RC_NOT_RESPONDING) { 5000 MPI_EVENT_SAS_DEV_STAT_RC_NOT_RESPONDING) {
4912 mptsas_target_reset_queue(ioc, sas_event_data); 5001 mptsas_target_reset_queue(ioc, sas_event_data);
4913 return 0; 5002 return 0;
4914 } 5003 }
5004 if (sas_event_data->ReasonCode ==
5005 MPI_EVENT_SAS_DEV_STAT_RC_INTERNAL_DEVICE_RESET &&
5006 ioc->device_missing_delay &&
5007 (ioc_stat & MPI_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE)) {
5008 VirtTarget *vtarget = NULL;
5009 u8 id, channel;
5010 u32 log_info = le32_to_cpu(reply->IOCLogInfo);
5011
5012 id = sas_event_data->TargetID;
5013 channel = sas_event_data->Bus;
5014
5015 vtarget = mptsas_find_vtarget(ioc, channel, id);
5016 if (vtarget) {
5017 devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
5018 "LogInfo (0x%x) available for "
5019 "INTERNAL_DEVICE_RESET"
5020 "fw_id %d fw_channel %d\n", ioc->name,
5021 log_info, id, channel));
5022 if (vtarget->raidVolume) {
5023 devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
5024 "Skipping Raid Volume for inDMD\n",
5025 ioc->name));
5026 } else {
5027 devtprintk(ioc, printk(MYIOC_s_DEBUG_FMT
5028 "Setting device flag inDMD\n",
5029 ioc->name));
5030 vtarget->inDMD = 1;
5031 }
5032
5033 }
5034
5035 }
5036
4915 break; 5037 break;
4916 } 5038 }
4917 case MPI_EVENT_SAS_EXPANDER_STATUS_CHANGE: 5039 case MPI_EVENT_SAS_EXPANDER_STATUS_CHANGE:
@@ -5244,6 +5366,7 @@ mptsas_init(void)
5244 sas_attach_transport(&mptsas_transport_functions); 5366 sas_attach_transport(&mptsas_transport_functions);
5245 if (!mptsas_transport_template) 5367 if (!mptsas_transport_template)
5246 return -ENODEV; 5368 return -ENODEV;
5369 mptsas_transport_template->eh_timed_out = mptsas_eh_timed_out;
5247 5370
5248 mptsasDoneCtx = mpt_register(mptscsih_io_done, MPTSAS_DRIVER); 5371 mptsasDoneCtx = mpt_register(mptscsih_io_done, MPTSAS_DRIVER);
5249 mptsasTaskCtx = mpt_register(mptscsih_taskmgmt_complete, MPTSAS_DRIVER); 5372 mptsasTaskCtx = mpt_register(mptscsih_taskmgmt_complete, MPTSAS_DRIVER);
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index dceb67a21825..59b8f53d1ece 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -664,6 +664,7 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
664 u32 log_info; 664 u32 log_info;
665 665
666 status = le16_to_cpu(pScsiReply->IOCStatus) & MPI_IOCSTATUS_MASK; 666 status = le16_to_cpu(pScsiReply->IOCStatus) & MPI_IOCSTATUS_MASK;
667
667 scsi_state = pScsiReply->SCSIState; 668 scsi_state = pScsiReply->SCSIState;
668 scsi_status = pScsiReply->SCSIStatus; 669 scsi_status = pScsiReply->SCSIStatus;
669 xfer_cnt = le32_to_cpu(pScsiReply->TransferCount); 670 xfer_cnt = le32_to_cpu(pScsiReply->TransferCount);
@@ -738,15 +739,36 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
738 739
739 case MPI_IOCSTATUS_SCSI_IOC_TERMINATED: /* 0x004B */ 740 case MPI_IOCSTATUS_SCSI_IOC_TERMINATED: /* 0x004B */
740 if ( ioc->bus_type == SAS ) { 741 if ( ioc->bus_type == SAS ) {
741 u16 ioc_status = le16_to_cpu(pScsiReply->IOCStatus); 742 u16 ioc_status =
742 if (ioc_status & MPI_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) { 743 le16_to_cpu(pScsiReply->IOCStatus);
743 if ((log_info & SAS_LOGINFO_MASK) 744 if ((ioc_status &
744 == SAS_LOGINFO_NEXUS_LOSS) { 745 MPI_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE)
745 sc->result = 746 &&
746 (DID_TRANSPORT_DISRUPTED 747 ((log_info & SAS_LOGINFO_MASK) ==
747 << 16); 748 SAS_LOGINFO_NEXUS_LOSS)) {
748 break; 749 VirtDevice *vdevice =
749 } 750 sc->device->hostdata;
751
752 /* flag the device as being in
753 * device removal delay so we can
754 * notify the midlayer to hold off
755 * on timeout eh */
756 if (vdevice && vdevice->
757 vtarget &&
758 vdevice->vtarget->
759 raidVolume)
760 printk(KERN_INFO
761 "Skipping Raid Volume"
762 "for inDMD\n");
763 else if (vdevice &&
764 vdevice->vtarget)
765 vdevice->vtarget->
766 inDMD = 1;
767
768 sc->result =
769 (DID_TRANSPORT_DISRUPTED
770 << 16);
771 break;
750 } 772 }
751 } else if (ioc->bus_type == FC) { 773 } else if (ioc->bus_type == FC) {
752 /* 774 /*