aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Bottomley <JBottomley@Parallels.com>2012-07-25 15:55:55 -0400
committerJames Bottomley <JBottomley@Parallels.com>2012-08-22 01:42:54 -0400
commit14216561e164671ce147458653b1fea06a4ada1e (patch)
treedbe91171fa7469c868268a6506c05a2a6c0982e6
parent27c419739b67decced4650440829b8d51bef954b (diff)
[SCSI] Fix 'Device not ready' issue on mpt2sas
This is a particularly nasty SCSI ATA Translation Layer (SATL) problem. SAT-2 says (section 8.12.2) if the device is in the stopped state as the result of processing a START STOP UNIT command (see 9.11), then the SATL shall terminate the TEST UNIT READY command with CHECK CONDITION status with the sense key set to NOT READY and the additional sense code of LOGICAL UNIT NOT READY, INITIALIZING COMMAND REQUIRED; mpt2sas internal SATL seems to implement this. The result is very confusing standby behaviour (using hdparm -y). If you suspend a drive and then send another command, usually it wakes up. However, if the next command is a TEST UNIT READY, the SATL sees that the drive is suspended and proceeds to follow the SATL rules for this, returning NOT READY to all subsequent commands. This means that the ordering of TEST UNIT READY is crucial: if you send TUR and then a command, you get a NOT READY to both back. If you send a command and then a TUR, you get GOOD status because the preceeding command woke the drive. This bit us badly because commit 85ef06d1d252f6a2e73b678591ab71caad4667bb Author: Tejun Heo <tj@kernel.org> Date: Fri Jul 1 16:17:47 2011 +0200 block: flush MEDIA_CHANGE from drivers on close(2) Changed our ordering on TEST UNIT READY commands meaning that SATA drives connected to an mpt2sas now suspend and refuse to wake (because the mpt2sas SATL sees the suspend *before* the drives get awoken by the next ATA command) resulting in lots of failed commands. The standard is completely nuts forcing this inconsistent behaviour, but we have to work around it. The fix for this is twofold: 1. Set the allow_restart flag so we wake the drive when we see it has been suspended 2. Return all TEST UNIT READY status directly to the mid layer without any further error handling which prevents us causing error handling which may offline the device just because of a media check TUR. Reported-by: Matthias Prager <linux@matthiasprager.de> Cc: stable@vger.kernel.org Signed-off-by: James Bottomley <JBottomley@Parallels.com>
-rw-r--r--drivers/scsi/scsi_error.c10
-rw-r--r--drivers/scsi/scsi_scan.c10
2 files changed, 20 insertions, 0 deletions
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 4a6381c87253..de2337f255a7 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -42,6 +42,8 @@
42 42
43#include <trace/events/scsi.h> 43#include <trace/events/scsi.h>
44 44
45static void scsi_eh_done(struct scsi_cmnd *scmd);
46
45#define SENSE_TIMEOUT (10*HZ) 47#define SENSE_TIMEOUT (10*HZ)
46 48
47/* 49/*
@@ -241,6 +243,14 @@ static int scsi_check_sense(struct scsi_cmnd *scmd)
241 if (! scsi_command_normalize_sense(scmd, &sshdr)) 243 if (! scsi_command_normalize_sense(scmd, &sshdr))
242 return FAILED; /* no valid sense data */ 244 return FAILED; /* no valid sense data */
243 245
246 if (scmd->cmnd[0] == TEST_UNIT_READY && scmd->scsi_done != scsi_eh_done)
247 /*
248 * nasty: for mid-layer issued TURs, we need to return the
249 * actual sense data without any recovery attempt. For eh
250 * issued ones, we need to try to recover and interpret
251 */
252 return SUCCESS;
253
244 if (scsi_sense_is_deferred(&sshdr)) 254 if (scsi_sense_is_deferred(&sshdr))
245 return NEEDS_RETRY; 255 return NEEDS_RETRY;
246 256
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 56a93794c470..d947ffc20ceb 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -764,6 +764,16 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
764 sdev->model = (char *) (sdev->inquiry + 16); 764 sdev->model = (char *) (sdev->inquiry + 16);
765 sdev->rev = (char *) (sdev->inquiry + 32); 765 sdev->rev = (char *) (sdev->inquiry + 32);
766 766
767 if (strncmp(sdev->vendor, "ATA ", 8) == 0) {
768 /*
769 * sata emulation layer device. This is a hack to work around
770 * the SATL power management specifications which state that
771 * when the SATL detects the device has gone into standby
772 * mode, it shall respond with NOT READY.
773 */
774 sdev->allow_restart = 1;
775 }
776
767 if (*bflags & BLIST_ISROM) { 777 if (*bflags & BLIST_ISROM) {
768 sdev->type = TYPE_ROM; 778 sdev->type = TYPE_ROM;
769 sdev->removable = 1; 779 sdev->removable = 1;