libata: retry failed FLUSH if device didn't fail it

If ATA device failed FLUSH, it means that the device failed to write out some amount of data and the error needs to be reported to upper layers. As retries can't recover the lost data, FLUSH failures need to be reported immediately in general. However, if FLUSH fails due to transmission errors, the FLUSH needs to be retried; otherwise, filesystems may switch to RO mode and/or raid array may drop a drive for a random transmission glitch. This condition can be rather easily reproduced on certain ahci controllers which go through a PHY event after powersave mode switch + ext4 combination. Powersave mode switch is often closely followed by flush from the filesystem failing the FLUSH with ATA bus error which makes the filesystem code believe that data is lost and drop to RO mode. This was reported in the following bugzilla bug. http://bugzilla.kernel.org/show_bug.cgi?id=14543 This patch makes libata EH retry FLUSH if it wasn't failed by the device. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Andrey Vihrov <andrey.vihrov@gmail.com> Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
author: Tejun Heo <tj@kernel.org> 2009-11-19 01:36:45 -0500
committer: Jeff Garzik <jgarzik@redhat.com> 2009-12-03 02:46:35 -0500
commit: 6013efd8860bf15c1f86f365332642cfe557152f (patch)
tree: 9d7fe6d92ed9e067cda3b2e62d69f50175450e97
parent: fd6c29e3dec9e44ecbcba3c57efa08af70a10f1e (diff)
2 files changed, 95 insertions, 1 deletions
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index bba2ae5df1c2..0ea97c942ced 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -110,6 +110,13 @@ static const unsigned long ata_eh_identify_timeouts[] = {
        ULONG_MAX,
 };
+static const unsigned long ata_eh_flush_timeouts[] = {
+        15000,  /* be generous with flush */
+        15000,  /* ditto */
+        30000,  /* and even more generous */
+        ULONG_MAX,
+};
 static const unsigned long ata_eh_other_timeouts[] = {
         5000,  /* same rationale as identify timeout */
        10000,  /* ditto */
@@ -147,6 +154,8 @@ ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
          .timeouts = ata_eh_other_timeouts, },
        { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS),
          .timeouts = ata_eh_other_timeouts, },
+        { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT),
+          .timeouts = ata_eh_flush_timeouts },
 };
 #undef CMDS
@@ -3112,6 +3121,82 @@ static int atapi_eh_clear_ua(struct ata_device *dev)
        return 0;
 }
+/**
+ *      ata_eh_maybe_retry_flush - Retry FLUSH if necessary
+ *      @dev: ATA device which may need FLUSH retry
+ *
+ *      If @dev failed FLUSH, it needs to be reported upper layer
+ *      immediately as it means that @dev failed to remap and already
+ *      lost at least a sector and further FLUSH retrials won't make
+ *      any difference to the lost sector.  However, if FLUSH failed
+ *      for other reasons, for example transmission error, FLUSH needs
+ *      to be retried.
+ *
+ *      This function determines whether FLUSH failure retry is
+ *      necessary and performs it if so.
+ *
+ *      RETURNS:
+ *      0 if EH can continue, -errno if EH needs to be repeated.
+ */
+static int ata_eh_maybe_retry_flush(struct ata_device *dev)
+{
+        struct ata_link *link = dev->link;
+        struct ata_port *ap = link->ap;
+        struct ata_queued_cmd *qc;
+        struct ata_taskfile tf;
+        unsigned int err_mask;
+        int rc = 0;
+        /* did flush fail for this device? */
+        if (!ata_tag_valid(link->active_tag))
+                return 0;
+        qc = __ata_qc_from_tag(ap, link->active_tag);
+        if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT &&
+                               qc->tf.command != ATA_CMD_FLUSH))
+                return 0;
+        /* if the device failed it, it should be reported to upper layers */
+        if (qc->err_mask & AC_ERR_DEV)
+                return 0;
+        /* flush failed for some other reason, give it another shot */
+        ata_tf_init(dev, &tf);
+        tf.command = qc->tf.command;
+        tf.flags |= ATA_TFLAG_DEVICE;
+        tf.protocol = ATA_PROT_NODATA;
+        ata_dev_printk(dev, KERN_WARNING, "retrying FLUSH 0x%x Emask 0x%x\n",
+                       tf.command, qc->err_mask);
+        err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
+        if (!err_mask) {
+                /*
+                 * FLUSH is complete but there's no way to
+                 * successfully complete a failed command from EH.
+                 * Making sure retry is allowed at least once and
+                 * retrying it should do the trick - whatever was in
+                 * the cache is already on the platter and this won't
+                 * cause infinite loop.
+                 */
+                qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1);
+        } else {
+                ata_dev_printk(dev, KERN_WARNING, "FLUSH failed Emask 0x%x\n",
+                               err_mask);
+                rc = -EIO;
+                /* if device failed it, report it to upper layers */
+                if (err_mask & AC_ERR_DEV) {
+                        qc->err_mask |= AC_ERR_DEV;
+                        qc->result_tf = tf;
+                        if (!(ap->pflags & ATA_PFLAG_FROZEN))
+                                rc = 0;
+                }
+        }
+        return rc;
+}
 static int ata_link_nr_enabled(struct ata_link *link)
 {
        struct ata_device *dev;
@@ -3455,6 +3540,15 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
                        }
                }
+                /* retry flush if necessary */
+                ata_for_each_dev(dev, link, ALL) {
+                        if (dev->class != ATA_DEV_ATA)
+                                continue;
+                        rc = ata_eh_maybe_retry_flush(dev);
+                        if (rc)
+                                goto dev_fail;
+                }
                /* configure link power saving */
                if (ehc->i.action & ATA_EH_LPM)
                        ata_for_each_dev(dev, link, ALL)
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 87698640c091..ba07e84c9840 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -365,7 +365,7 @@ enum {
        /* This should match the actual table size of
         * ata_eh_cmd_timeout_table in libata-eh.c.
         */
-        ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 5,
+        ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6,
        /* Horkage types. May be set by libata or controller on drives
           (some horkage may be drive/controller pair dependant */
author	Tejun Heo <tj@kernel.org>	2009-11-19 01:36:45 -0500
committer	Jeff Garzik <jgarzik@redhat.com>	2009-12-03 02:46:35 -0500
commit	6013efd8860bf15c1f86f365332642cfe557152f (patch)
tree	9d7fe6d92ed9e067cda3b2e62d69f50175450e97
parent	fd6c29e3dec9e44ecbcba3c57efa08af70a10f1e (diff)

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index bba2ae5df1c2..0ea97c942ced 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c
@@ -110,6 +110,13 @@ static const unsigned long ata_eh_identify_timeouts[] = {
110	ULONG_MAX,	110	ULONG_MAX,
111	};	111	};
112		112
		113	static const unsigned long ata_eh_flush_timeouts[] = {
		114	15000, /* be generous with flush */
		115	15000, /* ditto */
		116	30000, /* and even more generous */
		117	ULONG_MAX,
		118	};
		119
113	static const unsigned long ata_eh_other_timeouts[] = {	120	static const unsigned long ata_eh_other_timeouts[] = {
114	5000, /* same rationale as identify timeout */	121	5000, /* same rationale as identify timeout */
115	10000, /* ditto */	122	10000, /* ditto */
@@ -147,6 +154,8 @@ ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
147	.timeouts = ata_eh_other_timeouts, },	154	.timeouts = ata_eh_other_timeouts, },
148	{ .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS),	155	{ .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS),
149	.timeouts = ata_eh_other_timeouts, },	156	.timeouts = ata_eh_other_timeouts, },
		157	{ .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT),
		158	.timeouts = ata_eh_flush_timeouts },
150	};	159	};
151	#undef CMDS	160	#undef CMDS
152		161
@@ -3112,6 +3121,82 @@ static int atapi_eh_clear_ua(struct ata_device *dev)
3112	return 0;	3121	return 0;
3113	}	3122	}
3114		3123
		3124	/**
		3125	* ata_eh_maybe_retry_flush - Retry FLUSH if necessary
		3126	* @dev: ATA device which may need FLUSH retry
		3127	*
		3128	* If @dev failed FLUSH, it needs to be reported upper layer
		3129	* immediately as it means that @dev failed to remap and already
		3130	* lost at least a sector and further FLUSH retrials won't make
		3131	* any difference to the lost sector. However, if FLUSH failed
		3132	* for other reasons, for example transmission error, FLUSH needs
		3133	* to be retried.
		3134	*
		3135	* This function determines whether FLUSH failure retry is
		3136	* necessary and performs it if so.
		3137	*
		3138	* RETURNS:
		3139	* 0 if EH can continue, -errno if EH needs to be repeated.
		3140	*/
		3141	static int ata_eh_maybe_retry_flush(struct ata_device *dev)
		3142	{
		3143	struct ata_link *link = dev->link;
		3144	struct ata_port *ap = link->ap;
		3145	struct ata_queued_cmd *qc;
		3146	struct ata_taskfile tf;
		3147	unsigned int err_mask;
		3148	int rc = 0;
		3149
		3150	/* did flush fail for this device? */
		3151	if (!ata_tag_valid(link->active_tag))
		3152	return 0;
		3153
		3154	qc = __ata_qc_from_tag(ap, link->active_tag);
		3155	if (qc->dev != dev \|\| (qc->tf.command != ATA_CMD_FLUSH_EXT &&
		3156	qc->tf.command != ATA_CMD_FLUSH))
		3157	return 0;
		3158
		3159	/* if the device failed it, it should be reported to upper layers */
		3160	if (qc->err_mask & AC_ERR_DEV)
		3161	return 0;
		3162
		3163	/* flush failed for some other reason, give it another shot */
		3164	ata_tf_init(dev, &tf);
		3165
		3166	tf.command = qc->tf.command;
		3167	tf.flags \|= ATA_TFLAG_DEVICE;
		3168	tf.protocol = ATA_PROT_NODATA;
		3169
		3170	ata_dev_printk(dev, KERN_WARNING, "retrying FLUSH 0x%x Emask 0x%x\n",
		3171	tf.command, qc->err_mask);
		3172
		3173	err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0);
		3174	if (!err_mask) {
		3175	/*
		3176	* FLUSH is complete but there's no way to
		3177	* successfully complete a failed command from EH.
		3178	* Making sure retry is allowed at least once and
		3179	* retrying it should do the trick - whatever was in
		3180	* the cache is already on the platter and this won't
		3181	* cause infinite loop.
		3182	*/
		3183	qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1);
		3184	} else {
		3185	ata_dev_printk(dev, KERN_WARNING, "FLUSH failed Emask 0x%x\n",
		3186	err_mask);
		3187	rc = -EIO;
		3188
		3189	/* if device failed it, report it to upper layers */
		3190	if (err_mask & AC_ERR_DEV) {
		3191	qc->err_mask \|= AC_ERR_DEV;
		3192	qc->result_tf = tf;
		3193	if (!(ap->pflags & ATA_PFLAG_FROZEN))
		3194	rc = 0;
		3195	}
		3196	}
		3197	return rc;
		3198	}
		3199
3115	static int ata_link_nr_enabled(struct ata_link *link)	3200	static int ata_link_nr_enabled(struct ata_link *link)
3116	{	3201	{
3117	struct ata_device *dev;	3202	struct ata_device *dev;
@@ -3455,6 +3540,15 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
3455	}	3540	}
3456	}	3541	}
3457		3542
		3543	/* retry flush if necessary */
		3544	ata_for_each_dev(dev, link, ALL) {
		3545	if (dev->class != ATA_DEV_ATA)
		3546	continue;
		3547	rc = ata_eh_maybe_retry_flush(dev);
		3548	if (rc)
		3549	goto dev_fail;
		3550	}
		3551
3458	/* configure link power saving */	3552	/* configure link power saving */
3459	if (ehc->i.action & ATA_EH_LPM)	3553	if (ehc->i.action & ATA_EH_LPM)
3460	ata_for_each_dev(dev, link, ALL)	3554	ata_for_each_dev(dev, link, ALL)


diff --git a/include/linux/libata.h b/include/linux/libata.h index 87698640c091..ba07e84c9840 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h
@@ -365,7 +365,7 @@ enum {
365	/* This should match the actual table size of	365	/* This should match the actual table size of
366	* ata_eh_cmd_timeout_table in libata-eh.c.	366	* ata_eh_cmd_timeout_table in libata-eh.c.
367	*/	367	*/
368	ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 5,	368	ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6,
369		369
370	/* Horkage types. May be set by libata or controller on drives	370	/* Horkage types. May be set by libata or controller on drives
371	(some horkage may be drive/controller pair dependant */	371	(some horkage may be drive/controller pair dependant */