aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorMike Christie <michaelc@cs.wisc.edu>2010-02-10 17:51:45 -0500
committerJames Bottomley <James.Bottomley@suse.de>2010-02-17 14:40:10 -0500
commit92ed4d69934a1281abcc10c6a82274a04651a260 (patch)
tree731351eb3bd1c642dc8ed8b27389173e44998a77 /drivers
parent9010b94636312c7fb12b591ef09e915f8f80bbd5 (diff)
[SCSI] libiscsi: reset cmd timer if cmds are making progress
This patch resets the cmd timer if cmds started before the timedout command are making progress. The idea is that the cmd probably timed out because we are trying to exeucte too many commands. If it turns out that the device the IO timedout on was bad or the cmd just got screwed up but other IO/devs were ok then we will will figure this out when the cmds ahead of the timed out one complete ok. This also fixes a bug where we were sort of detecting this by setting the last_timeout and last_xfer to the same value when the task was allocated. That caught the case where we never got to send any IO for it. However, if the problem had started right before we started the new task, then we were forced to wait an extra cmd timeout seconds to start the scsi eh. Signed-off-by: Mike Christie <michaelc@cs.wisc.edu> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/scsi/libiscsi.c53
1 files changed, 49 insertions, 4 deletions
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index c28a712fd4db..703eb6a88790 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1919,10 +1919,11 @@ static int iscsi_has_ping_timed_out(struct iscsi_conn *conn)
1919static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) 1919static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
1920{ 1920{
1921 enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED; 1921 enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
1922 struct iscsi_task *task = NULL; 1922 struct iscsi_task *task = NULL, *running_task;
1923 struct iscsi_cls_session *cls_session; 1923 struct iscsi_cls_session *cls_session;
1924 struct iscsi_session *session; 1924 struct iscsi_session *session;
1925 struct iscsi_conn *conn; 1925 struct iscsi_conn *conn;
1926 int i;
1926 1927
1927 cls_session = starget_to_session(scsi_target(sc->device)); 1928 cls_session = starget_to_session(scsi_target(sc->device));
1928 session = cls_session->dd_data; 1929 session = cls_session->dd_data;
@@ -1947,8 +1948,15 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
1947 } 1948 }
1948 1949
1949 task = (struct iscsi_task *)sc->SCp.ptr; 1950 task = (struct iscsi_task *)sc->SCp.ptr;
1950 if (!task) 1951 if (!task) {
1952 /*
1953 * Raced with completion. Just reset timer, and let it
1954 * complete normally
1955 */
1956 rc = BLK_EH_RESET_TIMER;
1951 goto done; 1957 goto done;
1958 }
1959
1952 /* 1960 /*
1953 * If we have sent (at least queued to the network layer) a pdu or 1961 * If we have sent (at least queued to the network layer) a pdu or
1954 * recvd one for the task since the last timeout ask for 1962 * recvd one for the task since the last timeout ask for
@@ -1956,10 +1964,10 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
1956 * we can check if it is the task or connection when we send the 1964 * we can check if it is the task or connection when we send the
1957 * nop as a ping. 1965 * nop as a ping.
1958 */ 1966 */
1959 if (time_after_eq(task->last_xfer, task->last_timeout)) { 1967 if (time_after(task->last_xfer, task->last_timeout)) {
1960 ISCSI_DBG_EH(session, "Command making progress. Asking " 1968 ISCSI_DBG_EH(session, "Command making progress. Asking "
1961 "scsi-ml for more time to complete. " 1969 "scsi-ml for more time to complete. "
1962 "Last data recv at %lu. Last timeout was at " 1970 "Last data xfer at %lu. Last timeout was at "
1963 "%lu\n.", task->last_xfer, task->last_timeout); 1971 "%lu\n.", task->last_xfer, task->last_timeout);
1964 task->have_checked_conn = false; 1972 task->have_checked_conn = false;
1965 rc = BLK_EH_RESET_TIMER; 1973 rc = BLK_EH_RESET_TIMER;
@@ -1977,6 +1985,43 @@ static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
1977 goto done; 1985 goto done;
1978 } 1986 }
1979 1987
1988 for (i = 0; i < conn->session->cmds_max; i++) {
1989 running_task = conn->session->cmds[i];
1990 if (!running_task->sc || running_task == task ||
1991 running_task->state != ISCSI_TASK_RUNNING)
1992 continue;
1993
1994 /*
1995 * Only check if cmds started before this one have made
1996 * progress, or this could never fail
1997 */
1998 if (time_after(running_task->sc->jiffies_at_alloc,
1999 task->sc->jiffies_at_alloc))
2000 continue;
2001
2002 if (time_after(running_task->last_xfer, task->last_timeout)) {
2003 /*
2004 * This task has not made progress, but a task
2005 * started before us has transferred data since
2006 * we started/last-checked. We could be queueing
2007 * too many tasks or the LU is bad.
2008 *
2009 * If the device is bad the cmds ahead of us on
2010 * other devs will complete, and this loop will
2011 * eventually fail starting the scsi eh.
2012 */
2013 ISCSI_DBG_EH(session, "Command has not made progress "
2014 "but commands ahead of it have. "
2015 "Asking scsi-ml for more time to "
2016 "complete. Our last xfer vs running task "
2017 "last xfer %lu/%lu. Last check %lu.\n",
2018 task->last_xfer, running_task->last_xfer,
2019 task->last_timeout);
2020 rc = BLK_EH_RESET_TIMER;
2021 goto done;
2022 }
2023 }
2024
1980 /* Assumes nop timeout is shorter than scsi cmd timeout */ 2025 /* Assumes nop timeout is shorter than scsi cmd timeout */
1981 if (task->have_checked_conn) 2026 if (task->have_checked_conn)
1982 goto done; 2027 goto done;