aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <htejun@gmail.com>2008-05-19 13:17:54 -0400
committerJeff Garzik <jgarzik@redhat.com>2008-07-14 15:59:32 -0400
commit87fbc5a060faf2394bee88a93519f9b9d434727c (patch)
tree6f90cacb1471e64051473c4e1e664b2dea8603e0
parentd8af0eb6046c56e7238171ca420622541db24926 (diff)
libata: improve EH internal command timeout handling
ATA_TMOUT_INTERNAL which was 30secs were used for all internal commands which is way too long when something goes wrong. This patch implements command type based stepped timeouts. Different command types can use different timeouts and each command type can use different timeout values after timeouts. ie. the initial timeout is set to a value which should cover most of the cases but not too long so that run away cases don't delay things too much. After the first try times out, the second try can use longer timeout and if that one times out too, it can go for full 30sec timeout. IDENTIFYs use 5s - 10s - 30s timeout and all other commands use 5s - 10s timeouts. This patch significantly cuts down the needed time to handle failure cases while still allowing libata to work with nut job devices through retries. Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
-rw-r--r--drivers/ata/libata-core.c16
-rw-r--r--drivers/ata/libata-eh.c121
-rw-r--r--drivers/ata/libata.h2
-rw-r--r--include/linux/libata.h8
4 files changed, 142 insertions, 5 deletions
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index c5c3b1b516e1..9bef1a84fe3f 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -144,7 +144,7 @@ static int libata_dma_mask = ATA_DMA_MASK_ATA|ATA_DMA_MASK_ATAPI|ATA_DMA_MASK_CF
144module_param_named(dma, libata_dma_mask, int, 0444); 144module_param_named(dma, libata_dma_mask, int, 0444);
145MODULE_PARM_DESC(dma, "DMA enable/disable (0x1==ATA, 0x2==ATAPI, 0x4==CF)"); 145MODULE_PARM_DESC(dma, "DMA enable/disable (0x1==ATA, 0x2==ATAPI, 0x4==CF)");
146 146
147static int ata_probe_timeout = ATA_TMOUT_INTERNAL / 1000; 147static int ata_probe_timeout;
148module_param(ata_probe_timeout, int, 0444); 148module_param(ata_probe_timeout, int, 0444);
149MODULE_PARM_DESC(ata_probe_timeout, "Set ATA probing timeout (seconds)"); 149MODULE_PARM_DESC(ata_probe_timeout, "Set ATA probing timeout (seconds)");
150 150
@@ -1611,6 +1611,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
1611 struct ata_link *link = dev->link; 1611 struct ata_link *link = dev->link;
1612 struct ata_port *ap = link->ap; 1612 struct ata_port *ap = link->ap;
1613 u8 command = tf->command; 1613 u8 command = tf->command;
1614 int auto_timeout = 0;
1614 struct ata_queued_cmd *qc; 1615 struct ata_queued_cmd *qc;
1615 unsigned int tag, preempted_tag; 1616 unsigned int tag, preempted_tag;
1616 u32 preempted_sactive, preempted_qc_active; 1617 u32 preempted_sactive, preempted_qc_active;
@@ -1683,8 +1684,14 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
1683 1684
1684 spin_unlock_irqrestore(ap->lock, flags); 1685 spin_unlock_irqrestore(ap->lock, flags);
1685 1686
1686 if (!timeout) 1687 if (!timeout) {
1687 timeout = ata_probe_timeout * 1000; 1688 if (ata_probe_timeout)
1689 timeout = ata_probe_timeout * 1000;
1690 else {
1691 timeout = ata_internal_cmd_timeout(dev, command);
1692 auto_timeout = 1;
1693 }
1694 }
1688 1695
1689 rc = wait_for_completion_timeout(&wait, msecs_to_jiffies(timeout)); 1696 rc = wait_for_completion_timeout(&wait, msecs_to_jiffies(timeout));
1690 1697
@@ -1760,6 +1767,9 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
1760 1767
1761 spin_unlock_irqrestore(ap->lock, flags); 1768 spin_unlock_irqrestore(ap->lock, flags);
1762 1769
1770 if ((err_mask & AC_ERR_TIMEOUT) && auto_timeout)
1771 ata_internal_cmd_timed_out(dev, command);
1772
1763 return err_mask; 1773 return err_mask;
1764} 1774}
1765 1775
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 83d1451fa714..d5f03a6e3334 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -67,6 +67,8 @@ enum {
67 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 67 ATA_ECAT_DUBIOUS_UNK_DEV = 7,
68 ATA_ECAT_NR = 8, 68 ATA_ECAT_NR = 8,
69 69
70 ATA_EH_CMD_DFL_TIMEOUT = 5000,
71
70 /* always put at least this amount of time between resets */ 72 /* always put at least this amount of time between resets */
71 ATA_EH_RESET_COOL_DOWN = 5000, 73 ATA_EH_RESET_COOL_DOWN = 5000,
72 74
@@ -93,6 +95,53 @@ static const unsigned long ata_eh_reset_timeouts[] = {
93 ULONG_MAX, /* > 1 min has elapsed, give up */ 95 ULONG_MAX, /* > 1 min has elapsed, give up */
94}; 96};
95 97
98static const unsigned long ata_eh_identify_timeouts[] = {
99 5000, /* covers > 99% of successes and not too boring on failures */
100 10000, /* combined time till here is enough even for media access */
101 30000, /* for true idiots */
102 ULONG_MAX,
103};
104
105static const unsigned long ata_eh_other_timeouts[] = {
106 5000, /* same rationale as identify timeout */
107 10000, /* ditto */
108 /* but no merciful 30sec for other commands, it just isn't worth it */
109 ULONG_MAX,
110};
111
112struct ata_eh_cmd_timeout_ent {
113 const u8 *commands;
114 const unsigned long *timeouts;
115};
116
117/* The following table determines timeouts to use for EH internal
118 * commands. Each table entry is a command class and matches the
119 * commands the entry applies to and the timeout table to use.
120 *
121 * On the retry after a command timed out, the next timeout value from
122 * the table is used. If the table doesn't contain further entries,
123 * the last value is used.
124 *
125 * ehc->cmd_timeout_idx keeps track of which timeout to use per
126 * command class, so if SET_FEATURES times out on the first try, the
127 * next try will use the second timeout value only for that class.
128 */
129#define CMDS(cmds...) (const u8 []){ cmds, 0 }
130static const struct ata_eh_cmd_timeout_ent
131ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
132 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI),
133 .timeouts = ata_eh_identify_timeouts, },
134 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT),
135 .timeouts = ata_eh_other_timeouts, },
136 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT),
137 .timeouts = ata_eh_other_timeouts, },
138 { .commands = CMDS(ATA_CMD_SET_FEATURES),
139 .timeouts = ata_eh_other_timeouts, },
140 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS),
141 .timeouts = ata_eh_other_timeouts, },
142};
143#undef CMDS
144
96static void __ata_port_freeze(struct ata_port *ap); 145static void __ata_port_freeze(struct ata_port *ap);
97#ifdef CONFIG_PM 146#ifdef CONFIG_PM
98static void ata_eh_handle_port_suspend(struct ata_port *ap); 147static void ata_eh_handle_port_suspend(struct ata_port *ap);
@@ -238,6 +287,73 @@ void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset,
238 287
239#endif /* CONFIG_PCI */ 288#endif /* CONFIG_PCI */
240 289
290static int ata_lookup_timeout_table(u8 cmd)
291{
292 int i;
293
294 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) {
295 const u8 *cur;
296
297 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++)
298 if (*cur == cmd)
299 return i;
300 }
301
302 return -1;
303}
304
305/**
306 * ata_internal_cmd_timeout - determine timeout for an internal command
307 * @dev: target device
308 * @cmd: internal command to be issued
309 *
310 * Determine timeout for internal command @cmd for @dev.
311 *
312 * LOCKING:
313 * EH context.
314 *
315 * RETURNS:
316 * Determined timeout.
317 */
318unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd)
319{
320 struct ata_eh_context *ehc = &dev->link->eh_context;
321 int ent = ata_lookup_timeout_table(cmd);
322 int idx;
323
324 if (ent < 0)
325 return ATA_EH_CMD_DFL_TIMEOUT;
326
327 idx = ehc->cmd_timeout_idx[dev->devno][ent];
328 return ata_eh_cmd_timeout_table[ent].timeouts[idx];
329}
330
331/**
332 * ata_internal_cmd_timed_out - notification for internal command timeout
333 * @dev: target device
334 * @cmd: internal command which timed out
335 *
336 * Notify EH that internal command @cmd for @dev timed out. This
337 * function should be called only for commands whose timeouts are
338 * determined using ata_internal_cmd_timeout().
339 *
340 * LOCKING:
341 * EH context.
342 */
343void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd)
344{
345 struct ata_eh_context *ehc = &dev->link->eh_context;
346 int ent = ata_lookup_timeout_table(cmd);
347 int idx;
348
349 if (ent < 0)
350 return;
351
352 idx = ehc->cmd_timeout_idx[dev->devno][ent];
353 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX)
354 ehc->cmd_timeout_idx[dev->devno][ent]++;
355}
356
241static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 357static void ata_ering_record(struct ata_ering *ering, unsigned int eflags,
242 unsigned int err_mask) 358 unsigned int err_mask)
243{ 359{
@@ -2600,8 +2716,11 @@ static int ata_eh_handle_dev_fail(struct ata_device *dev, int err)
2600 ata_eh_detach_dev(dev); 2716 ata_eh_detach_dev(dev);
2601 2717
2602 /* schedule probe if necessary */ 2718 /* schedule probe if necessary */
2603 if (ata_eh_schedule_probe(dev)) 2719 if (ata_eh_schedule_probe(dev)) {
2604 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2720 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
2721 memset(ehc->cmd_timeout_idx[dev->devno], 0,
2722 sizeof(ehc->cmd_timeout_idx[dev->devno]));
2723 }
2605 2724
2606 return 1; 2725 return 1;
2607 } else { 2726 } else {
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 1cf803adbc95..f6f9c28ec7f8 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -151,6 +151,8 @@ extern void ata_scsi_dev_rescan(struct work_struct *work);
151extern int ata_bus_probe(struct ata_port *ap); 151extern int ata_bus_probe(struct ata_port *ap);
152 152
153/* libata-eh.c */ 153/* libata-eh.c */
154extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
155extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd);
154extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); 156extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
155extern void ata_scsi_error(struct Scsi_Host *host); 157extern void ata_scsi_error(struct Scsi_Host *host);
156extern void ata_port_wait_eh(struct ata_port *ap); 158extern void ata_port_wait_eh(struct ata_port *ap);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 9058c2a325a9..035f8e1cd0ac 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -237,7 +237,6 @@ enum {
237 /* various lengths of time */ 237 /* various lengths of time */
238 ATA_TMOUT_BOOT = 30000, /* heuristic */ 238 ATA_TMOUT_BOOT = 30000, /* heuristic */
239 ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */ 239 ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */
240 ATA_TMOUT_INTERNAL = 30000,
241 ATA_TMOUT_INTERNAL_QUICK = 5000, 240 ATA_TMOUT_INTERNAL_QUICK = 5000,
242 241
243 /* FIXME: GoVault needs 2s but we can't afford that without 242 /* FIXME: GoVault needs 2s but we can't afford that without
@@ -341,6 +340,11 @@ enum {
341 340
342 SATA_PMP_RW_TIMEOUT = 3000, /* PMP read/write timeout */ 341 SATA_PMP_RW_TIMEOUT = 3000, /* PMP read/write timeout */
343 342
343 /* This should match the actual table size of
344 * ata_eh_cmd_timeout_table in libata-eh.c.
345 */
346 ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 5,
347
344 /* Horkage types. May be set by libata or controller on drives 348 /* Horkage types. May be set by libata or controller on drives
345 (some horkage may be drive/controller pair dependant */ 349 (some horkage may be drive/controller pair dependant */
346 350
@@ -598,6 +602,8 @@ struct ata_eh_info {
598struct ata_eh_context { 602struct ata_eh_context {
599 struct ata_eh_info i; 603 struct ata_eh_info i;
600 int tries[ATA_MAX_DEVICES]; 604 int tries[ATA_MAX_DEVICES];
605 int cmd_timeout_idx[ATA_MAX_DEVICES]
606 [ATA_EH_CMD_TIMEOUT_TABLE_SIZE];
601 unsigned int classes[ATA_MAX_DEVICES]; 607 unsigned int classes[ATA_MAX_DEVICES];
602 unsigned int did_probe_mask; 608 unsigned int did_probe_mask;
603 unsigned int saved_ncq_enabled; 609 unsigned int saved_ncq_enabled;