diff options
author | Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com> | 2012-01-16 16:30:25 -0500 |
---|---|---|
committer | James Bottomley <JBottomley@Parallels.com> | 2012-02-18 09:33:13 -0500 |
commit | a92fa25c63a788758bd52e9123504d133210c8b7 (patch) | |
tree | bec201c46aa5ae21f2006bb87ad744741b35e241 /drivers/scsi/ipr.c | |
parent | 7fbd764881a5f9dc81a378293b7a74227fcc04ed (diff) |
[SCSI] ipr: fix eeh recovery for 64-bit adapters
In some scenarios, an EEH error can take a long time to be detected, since the
driver issues an MMIO read only after a device reset command times out and we
try to reset the adapter. This patch adds some code in ipr_cancel_op() to read
a hardware register so we detect the error earlier in case the op is being
aborted because of a timeout caused by a frozen adapter slot.
Another problem in such scenarios is that in __ipr_eh_host_reset() we change the
dump state flag from WAIT_FOR_DUMP to GET_DUMP, and the flag is later changed
from GET_DUMP to READ_DUMP in ipr_reset_restore_cfg_space(). However, if when
__ipr_eh_host_reset() is called by the SCSI error handling the function
ipr_reset_restore_cfg_space() has already been called by the PCI EEH code, we
end up with the flag in an inconsistent state. This patch also prevents this
problem.
Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
Acked-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
Diffstat (limited to 'drivers/scsi/ipr.c')
-rw-r--r-- | drivers/scsi/ipr.c | 24 |
1 files changed, 18 insertions, 6 deletions
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 67b169b7a5be..b538f0883fd2 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c | |||
@@ -4613,11 +4613,13 @@ static int __ipr_eh_host_reset(struct scsi_cmnd * scsi_cmd) | |||
4613 | ENTER; | 4613 | ENTER; |
4614 | ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; | 4614 | ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; |
4615 | 4615 | ||
4616 | dev_err(&ioa_cfg->pdev->dev, | 4616 | if (!ioa_cfg->in_reset_reload) { |
4617 | "Adapter being reset as a result of error recovery.\n"); | 4617 | dev_err(&ioa_cfg->pdev->dev, |
4618 | "Adapter being reset as a result of error recovery.\n"); | ||
4618 | 4619 | ||
4619 | if (WAIT_FOR_DUMP == ioa_cfg->sdt_state) | 4620 | if (WAIT_FOR_DUMP == ioa_cfg->sdt_state) |
4620 | ioa_cfg->sdt_state = GET_DUMP; | 4621 | ioa_cfg->sdt_state = GET_DUMP; |
4622 | } | ||
4621 | 4623 | ||
4622 | rc = ipr_reset_reload(ioa_cfg, IPR_SHUTDOWN_ABBREV); | 4624 | rc = ipr_reset_reload(ioa_cfg, IPR_SHUTDOWN_ABBREV); |
4623 | 4625 | ||
@@ -4907,7 +4909,7 @@ static int ipr_cancel_op(struct scsi_cmnd * scsi_cmd) | |||
4907 | struct ipr_ioa_cfg *ioa_cfg; | 4909 | struct ipr_ioa_cfg *ioa_cfg; |
4908 | struct ipr_resource_entry *res; | 4910 | struct ipr_resource_entry *res; |
4909 | struct ipr_cmd_pkt *cmd_pkt; | 4911 | struct ipr_cmd_pkt *cmd_pkt; |
4910 | u32 ioasc; | 4912 | u32 ioasc, int_reg; |
4911 | int op_found = 0; | 4913 | int op_found = 0; |
4912 | 4914 | ||
4913 | ENTER; | 4915 | ENTER; |
@@ -4920,7 +4922,17 @@ static int ipr_cancel_op(struct scsi_cmnd * scsi_cmd) | |||
4920 | */ | 4922 | */ |
4921 | if (ioa_cfg->in_reset_reload || ioa_cfg->ioa_is_dead) | 4923 | if (ioa_cfg->in_reset_reload || ioa_cfg->ioa_is_dead) |
4922 | return FAILED; | 4924 | return FAILED; |
4923 | if (!res || !ipr_is_gscsi(res)) | 4925 | if (!res) |
4926 | return FAILED; | ||
4927 | |||
4928 | /* | ||
4929 | * If we are aborting a timed out op, chances are that the timeout was caused | ||
4930 | * by a still not detected EEH error. In such cases, reading a register will | ||
4931 | * trigger the EEH recovery infrastructure. | ||
4932 | */ | ||
4933 | int_reg = readl(ioa_cfg->regs.sense_interrupt_reg); | ||
4934 | |||
4935 | if (!ipr_is_gscsi(res)) | ||
4924 | return FAILED; | 4936 | return FAILED; |
4925 | 4937 | ||
4926 | list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { | 4938 | list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { |